diff options
Diffstat (limited to 'Tools')
43 files changed, 3244 insertions, 644 deletions
diff --git a/Tools/README b/Tools/README index c1f89ba..0d961de 100644 --- a/Tools/README +++ b/Tools/README @@ -3,7 +3,7 @@ while building or extending Python. buildbot Batchfiles for running on Windows buildslaves. -ccbench A Python concurrency benchmark. +ccbench A Python threads-based concurrency benchmark. (*) demo Several Python programming demos. @@ -17,13 +17,13 @@ i18n Tools for internationalization. pygettext.py and msgfmt.py generates a binary message catalog from a catalog in text format. -iobench Benchmark for the new Python I/O system. +iobench Benchmark for the new Python I/O system. (*) msi Support for packaging Python as an MSI package on Windows. parser Un-parsing tool to generate code from an AST. -pybench Comprehensive Python benchmarking suite. +pybench Low-level benchmarking for the Python evaluation loop. (*) pynche A Tkinter-based color editor. @@ -32,6 +32,9 @@ scripts A number of useful single-file programs, e.g. tabnanny.py tabs and spaces, and 2to3, which converts Python 2 code to Python 3 code. +stringbench A suite of micro-benchmarks for various operations on + strings (both 8-bit and unicode). (*) + test2to3 A demonstration of how to use 2to3 transparently in setup.py. unicode Tools for generating unicodedata and codecs from unicode.org @@ -40,3 +43,6 @@ unicode Tools for generating unicodedata and codecs from unicode.org unittestgui A Tkinter based GUI test runner for unittest, with test discovery. + + +(*) A generic benchmark suite is maintained separately at http://hg.python.org/benchmarks/ diff --git a/Tools/buildbot/build-amd64.bat b/Tools/buildbot/build-amd64.bat index 8713b38..7ee7b2d 100644 --- a/Tools/buildbot/build-amd64.bat +++ b/Tools/buildbot/build-amd64.bat @@ -1,7 +1,7 @@ @rem Used by the buildbot "compile" step. -set HOST_PYTHON="%CD%\PCbuild\amd64\python_d.exe" cmd /c Tools\buildbot\external-amd64.bat -call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 +call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 cmd /c Tools\buildbot\clean-amd64.bat -vcbuild /useenv PCbuild\kill_python.vcproj "Debug|x64" && PCbuild\amd64\kill_python_d.exe -vcbuild PCbuild\pcbuild.sln "Debug|x64" +msbuild PCbuild\kill_python.vcxproj /p:Configuration=Debug /p:PlatformTarget=x64 +PCbuild\amd64\kill_python_d.exe +msbuild PCbuild\pcbuild.sln /p:Configuration=Debug /p:Platform=x64 diff --git a/Tools/buildbot/build.bat b/Tools/buildbot/build.bat index ab3ea7d..c93cfd5 100644 --- a/Tools/buildbot/build.bat +++ b/Tools/buildbot/build.bat @@ -1,7 +1,8 @@ @rem Used by the buildbot "compile" step. cmd /c Tools\buildbot\external.bat -call "%VS90COMNTOOLS%vsvars32.bat" +call "%VS100COMNTOOLS%vsvars32.bat" cmd /c Tools\buildbot\clean.bat -vcbuild /useenv PCbuild\kill_python.vcproj "Debug|Win32" && PCbuild\kill_python_d.exe -vcbuild /useenv PCbuild\pcbuild.sln "Debug|Win32" +msbuild PCbuild\kill_python.vcxproj /p:Configuration=Debug /p:PlatformTarget=x86 +PCbuild\kill_python_d.exe +msbuild PCbuild\pcbuild.sln /p:Configuration=Debug /p:Platform=Win32 diff --git a/Tools/buildbot/buildmsi.bat b/Tools/buildbot/buildmsi.bat index 4430cb8..ae93e67 100644 --- a/Tools/buildbot/buildmsi.bat +++ b/Tools/buildbot/buildmsi.bat @@ -2,10 +2,10 @@ cmd /c Tools\buildbot\external.bat @rem build release versions of things -call "%VS90COMNTOOLS%vsvars32.bat" +call "%VS100COMNTOOLS%vsvars32.bat" @rem build Python -vcbuild /useenv PCbuild\pcbuild.sln "Release|Win32" +msbuild /p:useenv=true PCbuild\pcbuild.sln /p:Configuration=Release /p:Platform=Win32 @rem build the documentation bash.exe -c 'cd Doc;make PYTHON=python2.5 update htmlhelp' diff --git a/Tools/buildbot/clean-amd64.bat b/Tools/buildbot/clean-amd64.bat index 715805a..24660af 100644 --- a/Tools/buildbot/clean-amd64.bat +++ b/Tools/buildbot/clean-amd64.bat @@ -1,10 +1,10 @@ @rem Used by the buildbot "clean" step. -call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 +call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 @echo Deleting .pyc/.pyo files ... del /s Lib\*.pyc Lib\*.pyo @echo Deleting test leftovers ... rmdir /s /q build cd PCbuild -vcbuild /clean pcbuild.sln "Release|x64" -vcbuild /clean pcbuild.sln "Debug|x64" +msbuild /target:clean pcbuild.sln /p:Configuration=Release /p:PlatformTarget=x64 +msbuild /target:clean pcbuild.sln /p:Configuration=Debug /p:PlatformTarget=x64 cd .. diff --git a/Tools/buildbot/clean.bat b/Tools/buildbot/clean.bat index 0c04b8e..218facc 100644 --- a/Tools/buildbot/clean.bat +++ b/Tools/buildbot/clean.bat @@ -1,8 +1,8 @@ @rem Used by the buildbot "clean" step. -call "%VS90COMNTOOLS%vsvars32.bat" +call "%VS100COMNTOOLS%vsvars32.bat" @echo Deleting test leftovers ... rmdir /s /q build cd PCbuild -vcbuild /clean pcbuild.sln "Release|Win32" -vcbuild /clean pcbuild.sln "Debug|Win32" +msbuild /target:clean pcbuild.sln /p:Configuration=Release /p:PlatformTarget=x86 +msbuild /target:clean pcbuild.sln /p:Configuration=Debug /p:PlatformTarget=x86 cd .. diff --git a/Tools/buildbot/external-amd64.bat b/Tools/buildbot/external-amd64.bat index 954238e..d2ff255 100644 --- a/Tools/buildbot/external-amd64.bat +++ b/Tools/buildbot/external-amd64.bat @@ -2,20 +2,20 @@ @rem Assume we start inside the Python source directory call "Tools\buildbot\external-common.bat" -call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 +call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 if not exist tcltk64\bin\tcl85g.dll ( - cd tcl-8.5.9.0\win + cd tcl-8.5.11.0\win nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 clean all nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 install cd ..\.. ) if not exist tcltk64\bin\tk85g.dll ( - cd tk-8.5.9.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.9.0 clean - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.9.0 all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.9.0 install + cd tk-8.5.11.0\win + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 clean + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 all + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 install cd ..\.. ) diff --git a/Tools/buildbot/external-common.bat b/Tools/buildbot/external-common.bat index 2165b8a..4e9fe41 100644 --- a/Tools/buildbot/external-common.bat +++ b/Tools/buildbot/external-common.bat @@ -14,8 +14,8 @@ cd .. @rem if exist tk8.4.16 rd /s/q tk8.4.16 @rem if exist tk-8.4.18.1 rd /s/q tk-8.4.18.1 @rem if exist db-4.4.20 rd /s/q db-4.4.20 -@rem if exist openssl-1.0.0j rd /s/q openssl-1.0.0j -@rem if exist sqlite-3.7.4 rd /s/q sqlite-3.7.4 +@rem if exist openssl-1.0.1c rd /s/q openssl-1.0.1c +@rem if exist sqlite-3.7.12 rd /s/q sqlite-3.7.12 @rem bzip if not exist bzip2-1.0.6 ( @@ -24,17 +24,25 @@ if not exist bzip2-1.0.6 ( ) @rem OpenSSL -if not exist openssl-1.0.0j svn export http://svn.python.org/projects/external/openssl-1.0.0j +if not exist openssl-1.0.1c ( + rd /s/q openssl-1.0.0j + svn export http://svn.python.org/projects/external/openssl-1.0.1c +) @rem tcl/tk -if not exist tcl-8.5.9.0 ( +if not exist tcl-8.5.11.0 ( rd /s/q tcltk tcltk64 - svn export http://svn.python.org/projects/external/tcl-8.5.9.0 + svn export http://svn.python.org/projects/external/tcl-8.5.11.0 ) -if not exist tk-8.5.9.0 svn export http://svn.python.org/projects/external/tk-8.5.9.0 +if not exist tk-8.5.11.0 svn export http://svn.python.org/projects/external/tk-8.5.11.0 @rem sqlite3 -if not exist sqlite-3.7.4 ( - rd /s/q sqlite-source-3.6.21 - svn export http://svn.python.org/projects/external/sqlite-3.7.4 +if not exist sqlite-3.7.12 ( + rd /s/q sqlite-source-3.7.4 + svn export http://svn.python.org/projects/external/sqlite-3.7.12 +) + +@rem lzma +if not exist xz-5.0.3 ( + svn export http://svn.python.org/projects/external/xz-5.0.3 ) diff --git a/Tools/buildbot/external.bat b/Tools/buildbot/external.bat index e958fd6..ed5c10e 100644 --- a/Tools/buildbot/external.bat +++ b/Tools/buildbot/external.bat @@ -2,20 +2,20 @@ @rem Assume we start inside the Python source directory call "Tools\buildbot\external-common.bat" -call "%VS90COMNTOOLS%\vsvars32.bat" +call "%VS100COMNTOOLS%\vsvars32.bat" if not exist tcltk\bin\tcl85g.dll ( @rem all and install need to be separate invocations, otherwise nmakehlp is not found on install - cd tcl-8.5.9.0\win + cd tcl-8.5.11.0\win nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 INSTALLDIR=..\..\tcltk clean all nmake -f makefile.vc DEBUG=1 INSTALLDIR=..\..\tcltk install cd ..\.. ) if not exist tcltk\bin\tk85g.dll ( - cd tk-8.5.9.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.9.0 clean - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.9.0 all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.9.0 install + cd tk-8.5.11.0\win + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 clean + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 all + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 install cd ..\.. ) diff --git a/Tools/ccbench/ccbench.py b/Tools/ccbench/ccbench.py index 9f7118f..60cec3e 100644 --- a/Tools/ccbench/ccbench.py +++ b/Tools/ccbench/ccbench.py @@ -10,7 +10,6 @@ ccbench, a Python concurrency benchmark. import time import os import sys -import functools import itertools import threading import subprocess @@ -435,70 +434,70 @@ def run_bandwidth_client(**kwargs): def run_bandwidth_test(func, args, nthreads): # Create a listening socket to receive the packets. We use UDP which should # be painlessly cross-platform. - sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - sock.bind(("127.0.0.1", 0)) - addr = sock.getsockname() + with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock: + sock.bind(("127.0.0.1", 0)) + addr = sock.getsockname() - duration = BANDWIDTH_DURATION - packet_size = BANDWIDTH_PACKET_SIZE - - results = [] - threads = [] - end_event = [] - start_cond = threading.Condition() - started = False - if nthreads > 0: - # Warm up - func(*args) + duration = BANDWIDTH_DURATION + packet_size = BANDWIDTH_PACKET_SIZE results = [] - loop = TimedLoop(func, args) - ready = [] - ready_cond = threading.Condition() - - def run(): + threads = [] + end_event = [] + start_cond = threading.Condition() + started = False + if nthreads > 0: + # Warm up + func(*args) + + results = [] + loop = TimedLoop(func, args) + ready = [] + ready_cond = threading.Condition() + + def run(): + with ready_cond: + ready.append(None) + ready_cond.notify() + with start_cond: + while not started: + start_cond.wait() + loop(start_time, duration * 1.5, end_event, do_yield=False) + + for i in range(nthreads): + threads.append(threading.Thread(target=run)) + for t in threads: + t.setDaemon(True) + t.start() + # Wait for threads to be ready with ready_cond: - ready.append(None) - ready_cond.notify() - with start_cond: - while not started: - start_cond.wait() - loop(start_time, duration * 1.5, end_event, do_yield=False) - - for i in range(nthreads): - threads.append(threading.Thread(target=run)) - for t in threads: - t.setDaemon(True) - t.start() - # Wait for threads to be ready - with ready_cond: - while len(ready) < nthreads: - ready_cond.wait() - - # Run the client and wait for the first packet to arrive before - # unblocking the background threads. - process = run_bandwidth_client(addr=addr, - packet_size=packet_size, - duration=duration) - _time = time.time - # This will also wait for the parent to be ready - s = _recv(sock, packet_size) - remote_addr = eval(s.partition('#')[0]) - - with start_cond: - start_time = _time() - started = True - start_cond.notify(nthreads) - - n = 0 - first_time = None - while not end_event and BW_END not in s: - _sendto(sock, s, remote_addr) + while len(ready) < nthreads: + ready_cond.wait() + + # Run the client and wait for the first packet to arrive before + # unblocking the background threads. + process = run_bandwidth_client(addr=addr, + packet_size=packet_size, + duration=duration) + _time = time.time + # This will also wait for the parent to be ready s = _recv(sock, packet_size) - if first_time is None: - first_time = _time() - n += 1 - end_time = _time() + remote_addr = eval(s.partition('#')[0]) + + with start_cond: + start_time = _time() + started = True + start_cond.notify(nthreads) + + n = 0 + first_time = None + while not end_event and BW_END not in s: + _sendto(sock, s, remote_addr) + s = _recv(sock, packet_size) + if first_time is None: + first_time = _time() + n += 1 + end_time = _time() end_event.append(None) for t in threads: diff --git a/Tools/demo/life.py b/Tools/demo/life.py index dfb9ab8..fc4cb49 100755 --- a/Tools/demo/life.py +++ b/Tools/demo/life.py @@ -46,38 +46,38 @@ class LifeBoard: self.state = {} self.scr = scr Y, X = self.scr.getmaxyx() - self.X, self.Y = X-2, Y-2-1 + self.X, self.Y = X - 2, Y - 2 - 1 self.char = char self.scr.clear() # Draw a border around the board - border_line = '+'+(self.X*'-')+'+' + border_line = '+' + (self.X * '-') + '+' self.scr.addstr(0, 0, border_line) - self.scr.addstr(self.Y+1, 0, border_line) + self.scr.addstr(self.Y + 1, 0, border_line) for y in range(0, self.Y): - self.scr.addstr(1+y, 0, '|') - self.scr.addstr(1+y, self.X+1, '|') + self.scr.addstr(1 + y, 0, '|') + self.scr.addstr(1 + y, self.X + 1, '|') self.scr.refresh() def set(self, y, x): """Set a cell to the live state""" - if x<0 or self.X<=x or y<0 or self.Y<=y: - raise ValueError("Coordinates out of range %i,%i"% (y, x)) - self.state[x,y] = 1 + if x < 0 or self.X <= x or y < 0 or self.Y <= y: + raise ValueError("Coordinates out of range %i,%i" % (y, x)) + self.state[x, y] = 1 def toggle(self, y, x): """Toggle a cell's state between live and dead""" if x < 0 or self.X <= x or y < 0 or self.Y <= y: - raise ValueError("Coordinates out of range %i,%i"% (y, x)) + raise ValueError("Coordinates out of range %i,%i" % (y, x)) if (x, y) in self.state: del self.state[x, y] - self.scr.addch(y+1, x+1, ' ') + self.scr.addch(y + 1, x + 1, ' ') else: self.state[x, y] = 1 if curses.has_colors(): # Let's pick a random color! self.scr.attrset(curses.color_pair(random.randrange(1, 7))) - self.scr.addch(y+1, x+1, self.char) + self.scr.addch(y + 1, x + 1, self.char) self.scr.attrset(0) self.scr.refresh() @@ -88,43 +88,46 @@ class LifeBoard: def display(self, update_board=True): """Display the whole board, optionally computing one generation""" - M,N = self.X, self.Y + M, N = self.X, self.Y if not update_board: for i in range(0, M): for j in range(0, N): - if (i,j) in self.state: - self.scr.addch(j+1, i+1, self.char) + if (i, j) in self.state: + self.scr.addch(j + 1, i + 1, self.char) else: - self.scr.addch(j+1, i+1, ' ') + self.scr.addch(j + 1, i + 1, ' ') self.scr.refresh() return d = {} self.boring = 1 for i in range(0, M): - L = range( max(0, i-1), min(M, i+2) ) + L = range(max(0, i - 1), min(M, i + 2)) for j in range(0, N): s = 0 - live = (i,j) in self.state - for k in range( max(0, j-1), min(N, j+2) ): + live = (i, j) in self.state + for k in range(max(0, j - 1), min(N, j + 2)): for l in L: - if (l,k) in self.state: + if (l, k) in self.state: s += 1 s -= live if s == 3: # Birth - d[i,j] = 1 + d[i, j] = 1 if curses.has_colors(): # Let's pick a random color! self.scr.attrset(curses.color_pair( random.randrange(1, 7))) - self.scr.addch(j+1, i+1, self.char) + self.scr.addch(j + 1, i + 1, self.char) self.scr.attrset(0) - if not live: self.boring = 0 - elif s == 2 and live: d[i,j] = 1 # Survival + if not live: + self.boring = 0 + elif s == 2 and live: + # Survival + d[i, j] = 1 elif live: # Death - self.scr.addch(j+1, i+1, ' ') + self.scr.addch(j + 1, i + 1, ' ') self.boring = 0 self.state = d self.scr.refresh() @@ -135,16 +138,17 @@ class LifeBoard: for i in range(0, self.X): for j in range(0, self.Y): if random.random() > 0.5: - self.set(j,i) + self.set(j, i) def erase_menu(stdscr, menu_y): "Clear the space where the menu resides" stdscr.move(menu_y, 0) stdscr.clrtoeol() - stdscr.move(menu_y+1, 0) + stdscr.move(menu_y + 1, 0) stdscr.clrtoeol() + def display_menu(stdscr, menu_y): "Display the menu of possible keystroke commands" erase_menu(stdscr, menu_y) @@ -154,15 +158,16 @@ def display_menu(stdscr, menu_y): stdscr.attrset(curses.color_pair(1)) stdscr.addstr(menu_y, 4, 'Use the cursor keys to move, and space or Enter to toggle a cell.') - stdscr.addstr(menu_y+1, 4, + stdscr.addstr(menu_y + 1, 4, 'E)rase the board, R)andom fill, S)tep once or C)ontinuously, Q)uit') stdscr.attrset(0) + def keyloop(stdscr): # Clear the screen and display the menu of keys stdscr.clear() stdscr_y, stdscr_x = stdscr.getmaxyx() - menu_y = (stdscr_y-3)-1 + menu_y = (stdscr_y - 3) - 1 display_menu(stdscr, menu_y) # If color, then initialize the color pairs @@ -179,16 +184,16 @@ def keyloop(stdscr): curses.mousemask(curses.BUTTON1_CLICKED) # Allocate a subwindow for the Life board and create the board object - subwin = stdscr.subwin(stdscr_y-3, stdscr_x, 0, 0) + subwin = stdscr.subwin(stdscr_y - 3, stdscr_x, 0, 0) board = LifeBoard(subwin, char=ord('*')) board.display(update_board=False) # xpos, ypos are the cursor's position - xpos, ypos = board.X//2, board.Y//2 + xpos, ypos = board.X // 2, board.Y // 2 # Main loop: while True: - stdscr.move(1+ypos, 1+xpos) # Move the cursor + stdscr.move(1 + ypos, 1 + xpos) # Move the cursor c = stdscr.getch() # Get a keystroke if 0 < c < 256: c = chr(c) @@ -224,15 +229,21 @@ def keyloop(stdscr): board.display(update_board=False) elif c in 'Ss': board.display() - else: pass # Ignore incorrect keys - elif c == curses.KEY_UP and ypos > 0: ypos -= 1 - elif c == curses.KEY_DOWN and ypos < board.Y-1: ypos += 1 - elif c == curses.KEY_LEFT and xpos > 0: xpos -= 1 - elif c == curses.KEY_RIGHT and xpos < board.X-1: xpos += 1 + else: + # Ignore incorrect keys + pass + elif c == curses.KEY_UP and ypos > 0: + ypos -= 1 + elif c == curses.KEY_DOWN and ypos + 1 < board.Y: + ypos += 1 + elif c == curses.KEY_LEFT and xpos > 0: + xpos -= 1 + elif c == curses.KEY_RIGHT and xpos + 1 < board.X: + xpos += 1 elif c == curses.KEY_MOUSE: mouse_id, mouse_x, mouse_y, mouse_z, button_state = curses.getmouse() - if (mouse_x > 0 and mouse_x < board.X+1 and - mouse_y > 0 and mouse_y < board.Y+1): + if (mouse_x > 0 and mouse_x < board.X + 1 and + mouse_y > 0 and mouse_y < board.Y + 1): xpos = mouse_x - 1 ypos = mouse_y - 1 board.toggle(ypos, xpos) @@ -245,7 +256,7 @@ def keyloop(stdscr): def main(stdscr): - keyloop(stdscr) # Enter the main loop + keyloop(stdscr) # Enter the main loop if __name__ == '__main__': curses.wrapper(main) diff --git a/Tools/demo/ss1.py b/Tools/demo/ss1.py index 4cea667..71c9be8 100755 --- a/Tools/demo/ss1.py +++ b/Tools/demo/ss1.py @@ -812,7 +812,6 @@ class SheetGUI: def test_basic(): "Basic non-gui self-test." - import os a = Sheet() for x in range(1, 11): for y in range(1, 11): diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 8bbbb10..368a7d5 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -49,6 +49,11 @@ import sys _type_char_ptr = gdb.lookup_type('char').pointer() # char* _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char* _type_void_ptr = gdb.lookup_type('void').pointer() # void* +_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer() +_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer() + +# value computed later, see PyUnicodeObjectPtr.proxy() +_is_pep393 = None SIZEOF_VOID_P = _type_void_ptr.sizeof @@ -322,7 +327,6 @@ class PyObjectPtr(object): name_map = {'bool': PyBoolObjectPtr, 'classobj': PyClassObjectPtr, - 'instance': PyInstanceObjectPtr, 'NoneType': PyNoneStructPtr, 'frame': PyFrameObjectPtr, 'set' : PySetObjectPtr, @@ -396,7 +400,7 @@ class ProxyAlreadyVisited(object): def _write_instance_repr(out, visited, name, pyop_attrdict, address): - '''Shared code for use by old-style and new-style classes: + '''Shared code for use by all classes: write a representation to file-like object "out"''' out.write('<') out.write(name) @@ -479,7 +483,7 @@ class HeapTypeObjectPtr(PyObjectPtr): def proxyval(self, visited): ''' - Support for new-style classes. + Support for classes. Currently we just locate the dictionary using a transliteration to python of _PyObject_GetDictPtr, ignoring descriptors @@ -496,7 +500,7 @@ class HeapTypeObjectPtr(PyObjectPtr): attr_dict = {} tp_name = self.safe_tp_name() - # New-style class: + # Class: return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) def write_repr(self, out, visited): @@ -630,9 +634,14 @@ class PyDictObjectPtr(PyObjectPtr): Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, analagous to dict.iteritems() ''' - for i in safe_range(self.field('ma_mask') + 1): - ep = self.field('ma_table') + i - pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + keys = self.field('ma_keys') + values = self.field('ma_values') + for i in safe_range(keys['dk_size']): + ep = keys['dk_entries'].address + i + if long(values): + pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) + else: + pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) if not pyop_value.is_null(): pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) yield (pyop_key, pyop_value) @@ -668,44 +677,6 @@ class PyDictObjectPtr(PyObjectPtr): pyop_value.write_repr(out, visited) out.write('}') -class PyInstanceObjectPtr(PyObjectPtr): - _typename = 'PyInstanceObject' - - def proxyval(self, visited): - # Guard against infinite loops: - if self.as_address() in visited: - return ProxyAlreadyVisited('<...>') - visited.add(self.as_address()) - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - in_dict = self.pyop_field('in_dict').proxyval(visited) - - # Old-style class: - return InstanceProxy(cl_name, in_dict, long(self._gdbval)) - - def write_repr(self, out, visited): - # Guard against infinite loops: - if self.as_address() in visited: - out.write('<...>') - return - visited.add(self.as_address()) - - # Old-style class: - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - pyop_in_dict = self.pyop_field('in_dict') - - _write_instance_repr(out, visited, - cl_name, pyop_in_dict, self.as_address()) - class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -1123,15 +1094,46 @@ class PyUnicodeObjectPtr(PyObjectPtr): return _type_Py_UNICODE.sizeof def proxyval(self, visited): - # From unicodeobject.h: - # Py_ssize_t length; /* Length of raw Unicode data in buffer */ - # Py_UNICODE *str; /* Raw Unicode buffer */ - field_length = long(self.field('length')) - field_str = self.field('str') + global _is_pep393 + if _is_pep393 is None: + fields = gdb.lookup_type('PyUnicodeObject').target().fields() + _is_pep393 = 'data' in [f.name for f in fields] + if _is_pep393: + # Python 3.3 and newer + may_have_surrogates = False + compact = self.field('_base') + ascii = compact['_base'] + state = ascii['state'] + is_compact_ascii = (int(state['ascii']) and int(state['compact'])) + if not int(state['ready']): + # string is not ready + field_length = long(compact['wstr_length']) + may_have_surrogates = True + field_str = ascii['wstr'] + else: + field_length = long(ascii['length']) + if is_compact_ascii: + field_str = ascii.address + 1 + elif int(state['compact']): + field_str = compact.address + 1 + else: + field_str = self.field('data')['any'] + repr_kind = int(state['kind']) + if repr_kind == 1: + field_str = field_str.cast(_type_unsigned_char_ptr) + elif repr_kind == 2: + field_str = field_str.cast(_type_unsigned_short_ptr) + elif repr_kind == 4: + field_str = field_str.cast(_type_unsigned_int_ptr) + else: + # Python 3.2 and earlier + field_length = long(self.field('length')) + field_str = self.field('str') + may_have_surrogates = self.char_width() == 2 # Gather a list of ints from the Py_UNICODE array; these are either - # UCS-2 or UCS-4 code points: - if self.char_width() > 2: + # UCS-1, UCS-2 or UCS-4 code points: + if not may_have_surrogates: Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] else: # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the @@ -1330,7 +1332,7 @@ that this python file is installed to the same path as the library (or its /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py """ def register (obj): - if obj == None: + if obj is None: obj = gdb # Wire up the pretty-printer @@ -1388,6 +1390,23 @@ class Frame(object): iter_frame = iter_frame.newer() return index + # We divide frames into: + # - "python frames": + # - "bytecode frames" i.e. PyEval_EvalFrameEx + # - "other python frames": things that are of interest from a python + # POV, but aren't bytecode (e.g. GC, GIL) + # - everything else + + def is_python_frame(self): + '''Is this a PyEval_EvalFrameEx frame, or some other important + frame? (see is_other_python_frame for what "important" means in this + context)''' + if self.is_evalframeex(): + return True + if self.is_other_python_frame(): + return True + return False + def is_evalframeex(self): '''Is this a PyEval_EvalFrameEx frame?''' if self._gdbframe.name() == 'PyEval_EvalFrameEx': @@ -1404,6 +1423,49 @@ class Frame(object): return False + def is_other_python_frame(self): + '''Is this frame worth displaying in python backtraces? + Examples: + - waiting on the GIL + - garbage-collecting + - within a CFunction + If it is, return a descriptive string + For other frames, return False + ''' + if self.is_waiting_for_gil(): + return 'Waiting for the GIL' + elif self.is_gc_collect(): + return 'Garbage-collecting' + else: + # Detect invocations of PyCFunction instances: + older = self.older() + if older and older._gdbframe.name() == 'PyCFunction_Call': + # Within that frame: + # "func" is the local containing the PyObject* of the + # PyCFunctionObject instance + # "f" is the same value, but cast to (PyCFunctionObject*) + # "self" is the (PyObject*) of the 'self' + try: + # Use the prettyprinter for the func: + func = older._gdbframe.read_var('func') + return str(func) + except RuntimeError: + return 'PyCFunction invocation (unable to read "func")' + + # This frame isn't worth reporting: + return False + + def is_waiting_for_gil(self): + '''Is this frame waiting on the GIL?''' + # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: + name = self._gdbframe.name() + if name: + return name.startswith('pthread_cond_timedwait') + + def is_gc_collect(self): + '''Is this frame "collect" within the the garbage-collector?''' + return self._gdbframe.name() == 'collect' + def get_pyop(self): try: f = self._gdbframe.read_var('f') @@ -1433,8 +1495,22 @@ class Frame(object): @classmethod def get_selected_python_frame(cls): - '''Try to obtain the Frame for the python code in the selected frame, - or None''' + '''Try to obtain the Frame for the python-related code in the selected + frame, or None''' + frame = cls.get_selected_frame() + + while frame: + if frame.is_python_frame(): + return frame + frame = frame.older() + + # Not found: + return None + + @classmethod + def get_selected_bytecode_frame(cls): + '''Try to obtain the Frame for the python bytecode interpreter in the + selected GDB frame, or None''' frame = cls.get_selected_frame() while frame: @@ -1458,7 +1534,11 @@ class Frame(object): else: sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) else: - sys.stdout.write('#%i\n' % self.get_index()) + info = self.is_other_python_frame() + if info: + sys.stdout.write('#%i %s\n' % (self.get_index(), info)) + else: + sys.stdout.write('#%i\n' % self.get_index()) def print_traceback(self): if self.is_evalframeex(): @@ -1472,7 +1552,11 @@ class Frame(object): else: sys.stdout.write(' (unable to read python frame information)\n') else: - sys.stdout.write(' (not a python frame)\n') + info = self.is_other_python_frame() + if info: + sys.stdout.write(' %s\n' % info) + else: + sys.stdout.write(' (not a python frame)\n') class PyList(gdb.Command): '''List the current Python source code, if any @@ -1508,9 +1592,10 @@ class PyList(gdb.Command): if m: start, end = map(int, m.groups()) - frame = Frame.get_selected_python_frame() + # py-list requires an actual PyEval_EvalFrameEx frame: + frame = Frame.get_selected_bytecode_frame() if not frame: - print 'Unable to locate python frame' + print 'Unable to locate gdb frame for python bytecode interpreter' return pyop = frame.get_pyop() @@ -1562,7 +1647,7 @@ def move_in_stack(move_up): if not iter_frame: break - if iter_frame.is_evalframeex(): + if iter_frame.is_python_frame(): # Result: if iter_frame.select(): iter_frame.print_summary() @@ -1616,7 +1701,7 @@ class PyBacktraceFull(gdb.Command): def invoke(self, args, from_tty): frame = Frame.get_selected_python_frame() while frame: - if frame.is_evalframeex(): + if frame.is_python_frame(): frame.print_summary() frame = frame.older() @@ -1635,7 +1720,7 @@ class PyBacktrace(gdb.Command): sys.stdout.write('Traceback (most recent call first):\n') frame = Frame.get_selected_python_frame() while frame: - if frame.is_evalframeex(): + if frame.is_python_frame(): frame.print_traceback() frame = frame.older() diff --git a/Tools/hg/hgtouch.py b/Tools/hg/hgtouch.py new file mode 100644 index 0000000..5961a10 --- /dev/null +++ b/Tools/hg/hgtouch.py @@ -0,0 +1,103 @@ +"""Bring time stamps of generated checked-in files into the right order + +A versioned configuration file .hgtouch specifies generated files, in the +syntax of make rules. + + output: input1 input2 + +In addition to the dependency syntax, #-comments are supported. +""" +import errno +import os + +def parse_config(repo): + try: + fp = repo.wfile(".hgtouch") + except IOError, e: + if e.errno != errno.ENOENT: + raise + return {} + result = {} + with fp: + for line in fp: + # strip comments + line = line.split('#')[0].strip() + if ':' not in line: + continue + outputs, inputs = line.split(':', 1) + outputs = outputs.split() + inputs = inputs.split() + for o in outputs: + try: + result[o].extend(inputs) + except KeyError: + result[o] = inputs + return result + +def check_rule(ui, repo, modified, output, inputs): + f_output = repo.wjoin(output) + try: + o_time = os.stat(f_output).st_mtime + except OSError: + ui.warn("Generated file %s does not exist\n" % output) + return False + need_touch = False + backdate = None + backdate_source = None + for i in inputs: + f_i = repo.wjoin(i) + try: + i_time = os.stat(f_i).st_mtime + except OSError: + ui.warn(".hgtouch input file %s does not exist\n" % i) + return False + if i in modified: + # input is modified. Need to backdate at least to i_time + if backdate is None or backdate > i_time: + backdate = i_time + backdate_source = i + continue + if o_time <= i_time: + # generated file is older, touch + need_touch = True + if backdate is not None: + ui.warn("Input %s for file %s locally modified\n" % (backdate_source, output)) + # set to 1s before oldest modified input + backdate -= 1 + os.utime(f_output, (backdate, backdate)) + return False + if need_touch: + ui.note("Touching %s\n" % output) + os.utime(f_output, None) + return True + +def do_touch(ui, repo): + modified = repo.status()[0] + dependencies = parse_config(repo) + success = True + # try processing all rules in topological order + hold_back = {} + while dependencies: + output, inputs = dependencies.popitem() + # check whether any of the inputs is generated + for i in inputs: + if i in dependencies: + hold_back[output] = inputs + continue + success = check_rule(ui, repo, modified, output, inputs) + # put back held back rules + dependencies.update(hold_back) + hold_back = {} + if hold_back: + ui.warn("Cyclic dependency involving %s\n" % (' '.join(hold_back.keys()))) + return False + return success + +def touch(ui, repo): + "touch generated files that are older than their sources after an update." + do_touch(ui, repo) + +cmdtable = { + "touch": (touch, [], + "touch generated files according to the .hgtouch configuration") +} diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 67a960f..93b2b79 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -163,7 +163,6 @@ import time import getopt import token import tokenize -import operator __version__ = '1.5' diff --git a/Tools/importbench/README b/Tools/importbench/README new file mode 100644 index 0000000..81a5544 --- /dev/null +++ b/Tools/importbench/README @@ -0,0 +1,6 @@ +Importbench is a set of micro-benchmarks for various import scenarios. + +It should not be used as an overall benchmark of import performance, but rather +an easy way to measure impact of possible code changes. For a real-world +benchmark of import, use the normal_startup benchmark from +hg.python.org/benchmarks. diff --git a/Tools/importbench/importbench.py b/Tools/importbench/importbench.py new file mode 100644 index 0000000..714c0e4 --- /dev/null +++ b/Tools/importbench/importbench.py @@ -0,0 +1,252 @@ +"""Benchmark some basic import use-cases. + +The assumption is made that this benchmark is run in a fresh interpreter and +thus has no external changes made to import-related attributes in sys. + +""" +from test.test_importlib import util +from test.test_importlib.source import util as source_util +import decimal +import imp +import importlib +import importlib.machinery +import json +import os +import py_compile +import sys +import tabnanny +import timeit + + +def bench(name, cleanup=lambda: None, *, seconds=1, repeat=3): + """Bench the given statement as many times as necessary until total + executions take one second.""" + stmt = "__import__({!r})".format(name) + timer = timeit.Timer(stmt) + for x in range(repeat): + total_time = 0 + count = 0 + while total_time < seconds: + try: + total_time += timer.timeit(1) + finally: + cleanup() + count += 1 + else: + # One execution too far + if total_time > seconds: + count -= 1 + yield count // seconds + +def from_cache(seconds, repeat): + """sys.modules""" + name = '<benchmark import>' + module = imp.new_module(name) + module.__file__ = '<test>' + module.__package__ = '' + with util.uncache(name): + sys.modules[name] = module + for result in bench(name, repeat=repeat, seconds=seconds): + yield result + + +def builtin_mod(seconds, repeat): + """Built-in module""" + name = 'errno' + if name in sys.modules: + del sys.modules[name] + # Relying on built-in importer being implicit. + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + + +def source_wo_bytecode(seconds, repeat): + """Source w/o bytecode: small""" + sys.dont_write_bytecode = True + try: + name = '__importlib_test_benchmark__' + # Clears out sys.modules and puts an entry at the front of sys.path. + with source_util.create_modules(name) as mapping: + assert not os.path.exists(imp.cache_from_source(mapping[name])) + sys.meta_path.append(importlib.machinery.PathFinder) + loader = (importlib.machinery.SourceFileLoader, + importlib.machinery.SOURCE_SUFFIXES, True) + sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + finally: + sys.dont_write_bytecode = False + + +def _wo_bytecode(module): + name = module.__name__ + def benchmark_wo_bytecode(seconds, repeat): + """Source w/o bytecode: {}""" + bytecode_path = imp.cache_from_source(module.__file__) + if os.path.exists(bytecode_path): + os.unlink(bytecode_path) + sys.dont_write_bytecode = True + try: + for result in bench(name, lambda: sys.modules.pop(name), + repeat=repeat, seconds=seconds): + yield result + finally: + sys.dont_write_bytecode = False + + benchmark_wo_bytecode.__doc__ = benchmark_wo_bytecode.__doc__.format(name) + return benchmark_wo_bytecode + +tabnanny_wo_bytecode = _wo_bytecode(tabnanny) +decimal_wo_bytecode = _wo_bytecode(decimal) + + +def source_writing_bytecode(seconds, repeat): + """Source writing bytecode: small""" + assert not sys.dont_write_bytecode + name = '__importlib_test_benchmark__' + with source_util.create_modules(name) as mapping: + sys.meta_path.append(importlib.machinery.PathFinder) + loader = (importlib.machinery.SourceFileLoader, + importlib.machinery.SOURCE_SUFFIXES, True) + sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) + def cleanup(): + sys.modules.pop(name) + os.unlink(imp.cache_from_source(mapping[name])) + for result in bench(name, cleanup, repeat=repeat, seconds=seconds): + assert not os.path.exists(imp.cache_from_source(mapping[name])) + yield result + + +def _writing_bytecode(module): + name = module.__name__ + def writing_bytecode_benchmark(seconds, repeat): + """Source writing bytecode: {}""" + assert not sys.dont_write_bytecode + def cleanup(): + sys.modules.pop(name) + os.unlink(imp.cache_from_source(module.__file__)) + for result in bench(name, cleanup, repeat=repeat, seconds=seconds): + yield result + + writing_bytecode_benchmark.__doc__ = ( + writing_bytecode_benchmark.__doc__.format(name)) + return writing_bytecode_benchmark + +tabnanny_writing_bytecode = _writing_bytecode(tabnanny) +decimal_writing_bytecode = _writing_bytecode(decimal) + + +def source_using_bytecode(seconds, repeat): + """Source w/ bytecode: small""" + name = '__importlib_test_benchmark__' + with source_util.create_modules(name) as mapping: + sys.meta_path.append(importlib.machinery.PathFinder) + loader = (importlib.machinery.SourceFileLoader, + importlib.machinery.SOURCE_SUFFIXES, True) + sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) + py_compile.compile(mapping[name]) + assert os.path.exists(imp.cache_from_source(mapping[name])) + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + + +def _using_bytecode(module): + name = module.__name__ + def using_bytecode_benchmark(seconds, repeat): + """Source w/ bytecode: {}""" + py_compile.compile(module.__file__) + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + + using_bytecode_benchmark.__doc__ = ( + using_bytecode_benchmark.__doc__.format(name)) + return using_bytecode_benchmark + +tabnanny_using_bytecode = _using_bytecode(tabnanny) +decimal_using_bytecode = _using_bytecode(decimal) + + +def main(import_, options): + if options.source_file: + with options.source_file: + prev_results = json.load(options.source_file) + else: + prev_results = {} + __builtins__.__import__ = import_ + benchmarks = (from_cache, builtin_mod, + source_writing_bytecode, + source_wo_bytecode, source_using_bytecode, + tabnanny_writing_bytecode, + tabnanny_wo_bytecode, tabnanny_using_bytecode, + decimal_writing_bytecode, + decimal_wo_bytecode, decimal_using_bytecode, + ) + if options.benchmark: + for b in benchmarks: + if b.__doc__ == options.benchmark: + benchmarks = [b] + break + else: + print('Unknown benchmark: {!r}'.format(options.benchmark, + file=sys.stderr)) + sys.exit(1) + seconds = 1 + seconds_plural = 's' if seconds > 1 else '' + repeat = 3 + header = ('Measuring imports/second over {} second{}, best out of {}\n' + 'Entire benchmark run should take about {} seconds\n' + 'Using {!r} as __import__\n') + print(header.format(seconds, seconds_plural, repeat, + len(benchmarks) * seconds * repeat, __import__)) + new_results = {} + for benchmark in benchmarks: + print(benchmark.__doc__, "[", end=' ') + sys.stdout.flush() + results = [] + for result in benchmark(seconds=seconds, repeat=repeat): + results.append(result) + print(result, end=' ') + sys.stdout.flush() + assert not sys.dont_write_bytecode + print("]", "best is", format(max(results), ',d')) + new_results[benchmark.__doc__] = results + if prev_results: + print('\n\nComparing new vs. old\n') + for benchmark in benchmarks: + benchmark_name = benchmark.__doc__ + old_result = max(prev_results[benchmark_name]) + new_result = max(new_results[benchmark_name]) + result = '{:,d} vs. {:,d} ({:%})'.format(new_result, + old_result, + new_result/old_result) + print(benchmark_name, ':', result) + if options.dest_file: + with options.dest_file: + json.dump(new_results, options.dest_file, indent=2) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('-b', '--builtin', dest='builtin', action='store_true', + default=False, help="use the built-in __import__") + parser.add_argument('-r', '--read', dest='source_file', + type=argparse.FileType('r'), + help='file to read benchmark data from to compare ' + 'against') + parser.add_argument('-w', '--write', dest='dest_file', + type=argparse.FileType('w'), + help='file to write benchmark data to') + parser.add_argument('--benchmark', dest='benchmark', + help='specific benchmark to run') + options = parser.parse_args() + import_ = __import__ + if not options.builtin: + import_ = importlib.__import__ + + main(import_, options) diff --git a/Tools/iobench/iobench.py b/Tools/iobench/iobench.py index 5ec6f17..530bc79 100644 --- a/Tools/iobench/iobench.py +++ b/Tools/iobench/iobench.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- # This file should be kept compatible with both Python 2.6 and Python >= 3.0. -import time +import itertools import os +import platform import re import sys -import hashlib -import functools -import itertools +import time from optparse import OptionParser out = sys.stdout @@ -307,6 +306,16 @@ def run_all_tests(options): "large": 2, } + print("Python %s" % sys.version) + if sys.version_info < (3, 3): + if sys.maxunicode > 0xffff: + text = "UCS-4 (wide build)" + else: + text = "UTF-16 (narrow build)" + else: + text = "PEP 393" + print("Unicode: %s" % text) + print(platform.platform()) binary_files = list(get_binary_files()) text_files = list(get_text_files()) if "b" in options: diff --git a/Tools/msi/msi.py b/Tools/msi/msi.py index 508816d..2ec6951 100644 --- a/Tools/msi/msi.py +++ b/Tools/msi/msi.py @@ -2,12 +2,11 @@ # (C) 2003 Martin v. Loewis # See "FOO" in comments refers to MSDN sections with the title FOO. import msilib, schema, sequence, os, glob, time, re, shutil, zipfile +import subprocess, tempfile from msilib import Feature, CAB, Directory, Dialog, Binary, add_data import uisample from win32com.client import constants from distutils.spawn import find_executable -from uuids import product_codes -import tempfile # Settings can be overridden in config.py below # 0 for official python.org releases @@ -28,7 +27,7 @@ have_tcl = True # path to PCbuild directory PCBUILD="PCbuild" # msvcrt version -MSVCR = "90" +MSVCR = "100" # Name of certificate in default store to sign MSI with certname = None # Make a zip file containing the PDB files for this build? @@ -77,19 +76,16 @@ upgrade_code_64='{6A965A0C-6EE6-4E3A-9983-3263F56311EC}' if snapshot: current_version = "%s.%s.%s" % (major, minor, int(time.time()/3600/24)) - product_code = msilib.gen_uuid() -else: - product_code = product_codes[current_version] if full_current_version is None: full_current_version = current_version extensions = [ - 'bz2.pyd', 'pyexpat.pyd', 'select.pyd', 'unicodedata.pyd', 'winsound.pyd', + '_bz2.pyd', '_elementtree.pyd', '_socket.pyd', '_ssl.pyd', @@ -100,7 +96,10 @@ extensions = [ '_ctypes_test.pyd', '_sqlite3.pyd', '_hashlib.pyd', - '_multiprocessing.pyd' + '_multiprocessing.pyd', + '_lzma.pyd', + '_decimal.pyd', + '_testbuffer.pyd' ] # Well-known component UUIDs @@ -119,12 +118,11 @@ pythondll_uuid = { "30":"{6953bc3b-6768-4291-8410-7914ce6e2ca8}", "31":"{4afcba0b-13e4-47c3-bebe-477428b46913}", "32":"{3ff95315-1096-4d31-bd86-601d5438ad5e}", + "33":"{f7581ca4-d368-4eea-8f82-d48c64c4f047}", } [major+minor] # Compute the name that Sphinx gives to the docfile -docfile = "" -if int(micro): - docfile = micro +docfile = micro if level < 0xf: if level == 0xC: docfile += "rc%s" % (serial,) @@ -185,12 +183,19 @@ dll_path = os.path.join(srcdir, PCBUILD, dll_file) msilib.set_arch_from_file(dll_path) if msilib.pe_type(dll_path) != msilib.pe_type("msisupport.dll"): raise SystemError("msisupport.dll for incorrect architecture") + if msilib.Win64: upgrade_code = upgrade_code_64 - # Bump the last digit of the code by one, so that 32-bit and 64-bit - # releases get separate product codes - digit = hex((int(product_code[-2],16)+1)%16)[-1] - product_code = product_code[:-2] + digit + '}' + +if snapshot: + product_code = msilib.gen_uuid() +else: + # official release: generate UUID from the download link that the file will have + import uuid + product_code = uuid.uuid3(uuid.NAMESPACE_URL, + 'http://www.python.org/ftp/python/%s.%s.%s/python-%s%s.msi' % + (major, minor, micro, full_current_version, msilib.arch_ext)) + product_code = '{%s}' % product_code if testpackage: ext = 'px' @@ -281,7 +286,7 @@ def remove_old_versions(db): None, migrate_features, None, "REMOVEOLDSNAPSHOT")]) props = "REMOVEOLDSNAPSHOT;REMOVEOLDVERSION" - props += ";TARGETDIR;DLLDIR" + props += ";TARGETDIR;DLLDIR;LAUNCHERDIR" # Installer collects the product codes of the earlier releases in # these properties. In order to allow modification of the properties, # they must be declared as secure. See "SecureCustomProperties Property" @@ -410,7 +415,7 @@ def add_ui(db): ("VerdanaRed9", "Verdana", 9, 255, 0), ]) - compileargs = r'-Wi "[TARGETDIR]Lib\compileall.py" -f -x "bad_coding|badsyntax|site-packages|py2_|lib2to3\\tests" "[TARGETDIR]Lib"' + compileargs = r'-Wi "[TARGETDIR]Lib\compileall.py" -f -x "bad_coding|badsyntax|site-packages|py2_|lib2to3\\tests|venv\\scripts" "[TARGETDIR]Lib"' lib2to3args = r'-c "import lib2to3.pygram, lib2to3.patcomp;lib2to3.patcomp.PatternCompiler()"' # See "CustomAction Table" add_data(db, "CustomAction", [ @@ -421,6 +426,8 @@ def add_ui(db): "[WindowsVolume]Python%s%s" % (major, minor)), ("SetDLLDirToTarget", 307, "DLLDIR", "[TARGETDIR]"), ("SetDLLDirToSystem32", 307, "DLLDIR", SystemFolderName), + ("SetLauncherDirToTarget", 307, "LAUNCHERDIR", "[TARGETDIR]"), + ("SetLauncherDirToWindows", 307, "LAUNCHERDIR", "[WindowsFolder]"), # msidbCustomActionTypeExe + msidbCustomActionTypeSourceFile # See "Custom Action Type 18" ("CompilePyc", 18, "python.exe", compileargs), @@ -437,6 +444,8 @@ def add_ui(db): # In the user interface, assume all-users installation if privileged. ("SetDLLDirToSystem32", 'DLLDIR="" and ' + sys32cond, 751), ("SetDLLDirToTarget", 'DLLDIR="" and not ' + sys32cond, 752), + ("SetLauncherDirToWindows", 'LAUNCHERDIR="" and ' + sys32cond, 753), + ("SetLauncherDirToTarget", 'LAUNCHERDIR="" and not ' + sys32cond, 754), ("SelectDirectoryDlg", "Not Installed", 1230), # XXX no support for resume installations yet #("ResumeDlg", "Installed AND (RESUME OR Preselected)", 1240), @@ -445,13 +454,20 @@ def add_ui(db): add_data(db, "AdminUISequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToTarget", 'DLLDIR=""', 751), + ("SetLauncherDirToTarget", 'LAUNCHERDIR=""', 752), ]) + # Prepend TARGETDIR to the system path, and remove it on uninstall. + add_data(db, "Environment", + [("PathAddition", "=-*Path", "[TARGETDIR];[~]", "REGISTRY.path")]) + # Execute Sequences add_data(db, "InstallExecuteSequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToSystem32", 'DLLDIR="" and ' + sys32cond, 751), ("SetDLLDirToTarget", 'DLLDIR="" and not ' + sys32cond, 752), + ("SetLauncherDirToWindows", 'LAUNCHERDIR="" and ' + sys32cond, 753), + ("SetLauncherDirToTarget", 'LAUNCHERDIR="" and not ' + sys32cond, 754), ("UpdateEditIDLE", None, 1050), ("CompilePyc", "COMPILEALL", 6800), ("CompilePyo", "COMPILEALL", 6801), @@ -460,6 +476,7 @@ def add_ui(db): add_data(db, "AdminExecuteSequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToTarget", 'DLLDIR=""', 751), + ("SetLauncherDirToTarget", 'LAUNCHERDIR=""', 752), ("CompilePyc", "COMPILEALL", 6800), ("CompilePyo", "COMPILEALL", 6801), ("CompileGrammar", "COMPILEALL", 6802), @@ -670,11 +687,11 @@ def add_ui(db): c=features.xbutton("Advanced", "Advanced", None, 0.30) c.event("SpawnDialog", "AdvancedDlg") - c=features.text("ItemDescription", 140, 180, 210, 30, 3, + c=features.text("ItemDescription", 140, 180, 210, 40, 3, "Multiline description of the currently selected item.") c.mapping("SelectionDescription","Text") - c=features.text("ItemSize", 140, 210, 210, 45, 3, + c=features.text("ItemSize", 140, 225, 210, 33, 3, "The size of the currently selected item.") c.mapping("SelectionSize", "Text") @@ -828,7 +845,7 @@ def add_features(db): # (i.e. additional Python libraries) need to follow the parent feature. # Features that have no advertisement trigger (e.g. the test suite) # must not support advertisement - global default_feature, tcltk, htmlfiles, tools, testsuite, ext_feature, private_crt + global default_feature, tcltk, htmlfiles, tools, testsuite, ext_feature, private_crt, prepend_path default_feature = Feature(db, "DefaultFeature", "Python", "Python Interpreter and Libraries", 1, directory = "TARGETDIR") @@ -848,32 +865,38 @@ def add_features(db): htmlfiles = Feature(db, "Documentation", "Documentation", "Python HTMLHelp File", 7, parent = default_feature) tools = Feature(db, "Tools", "Utility Scripts", - "Python utility scripts (Tools/", 9, + "Python utility scripts (Tools/)", 9, parent = default_feature, attributes=2) testsuite = Feature(db, "Testsuite", "Test suite", "Python test suite (Lib/test/)", 11, parent = default_feature, attributes=2|8) - -def extract_msvcr90(): + # prepend_path is an additional feature which is to be off by default. + # Since the default level for the above features is 1, this needs to be + # at least level higher. + prepend_path = Feature(db, "PrependPath", "Add python.exe to Path", + "Prepend [TARGETDIR] to the system Path variable. " + "This allows you to type 'python' into a command " + "prompt without needing the full path.", 13, + parent = default_feature, attributes=2|8, + level=2) + +def extract_msvcr100(): # Find the redistributable files if msilib.Win64: - arch = "amd64" + arch = "x64" else: arch = "x86" - dir = os.path.join(os.environ['VS90COMNTOOLS'], r"..\..\VC\redist\%s\Microsoft.VC90.CRT" % arch) + dir = os.path.join(os.environ['VS100COMNTOOLS'], r"..\..\VC\redist\%s\Microsoft.VC100.CRT" % arch) result = [] installer = msilib.MakeInstaller() - # omit msvcm90 and msvcp90, as they aren't really needed - files = ["Microsoft.VC90.CRT.manifest", "msvcr90.dll"] - for f in files: - path = os.path.join(dir, f) - kw = {'src':path} - if f.endswith('.dll'): - kw['version'] = installer.FileVersion(path, 0) - kw['language'] = installer.FileVersion(path, 1) - result.append((f, kw)) - return result + # At least for VS2010, manifests are no longer provided + name = "msvcr100.dll" + path = os.path.join(dir, name) + kw = {'src':path} + kw['version'] = installer.FileVersion(path, 0) + kw['language'] = installer.FileVersion(path, 1) + return name, kw def generate_license(): import shutil, glob @@ -889,7 +912,7 @@ def generate_license(): dirs = glob.glob(srcdir+"/../"+pat) if not dirs: raise ValueError, "Could not find "+srcdir+"/../"+pat - if len(dirs) > 2: + if len(dirs) > 2 and not snapshot: raise ValueError, "Multiple copies of "+pat dir = dirs[0] shutil.copyfileobj(open(os.path.join(dir, file)), out) @@ -904,16 +927,28 @@ class PyDirectory(Directory): kw['componentflags'] = 2 #msidbComponentAttributesOptional Directory.__init__(self, *args, **kw) - def check_unpackaged(self): - self.unpackaged_files.discard('__pycache__') - self.unpackaged_files.discard('.svn') - if self.unpackaged_files: - print "Warning: Unpackaged files in %s" % self.absolute - print self.unpackaged_files +def hgmanifest(): + # Fetch file list from Mercurial + process = subprocess.Popen(['hg', 'manifest'], stdout=subprocess.PIPE) + stdout, stderr = process.communicate() + # Create nested directories for file tree + result = {} + for line in stdout.splitlines(): + components = line.split('/') + d = result + while len(components) > 1: + d1 = d.setdefault(components[0], {}) + d = d1 + del components[0] + d[components[0]] = None + return result + # See "File Table", "Component Table", "Directory Table", # "FeatureComponents Table" def add_files(db): + installer = msilib.MakeInstaller() + hgfiles = hgmanifest() cab = CAB("python") tmpfiles = [] # Add all executables, icons, text files into the TARGETDIR component @@ -932,11 +967,32 @@ def add_files(db): # msidbComponentAttributesSharedDllRefCount = 8, see "Component Table" dlldir = PyDirectory(db, cab, root, srcdir, "DLLDIR", ".") + launcherdir = PyDirectory(db, cab, root, srcdir, "LAUNCHERDIR", ".") + + # msidbComponentAttributes64bit = 256; this disables registry redirection + # to allow setting the SharedDLLs key in the 64-bit portion even for a + # 32-bit installer. + # XXX does this still allow to install the component on a 32-bit system? + # Pick up 32-bit binary always + launchersrc = PCBUILD + if launchersrc.lower() == 'pcbuild\\x64-pgo': + launchersrc = 'PCBuild\\win32-pgo' + if launchersrc.lower() == 'pcbuild\\amd64': + launchersrc = 'PCBuild' + launcher = os.path.join(srcdir, launchersrc, "py.exe") + launcherdir.start_component("launcher", flags = 8+256, keyfile="py.exe") + launcherdir.add_file(launcher, + version=installer.FileVersion(launcher, 0), + language=installer.FileVersion(launcher, 1)) + launcherw = os.path.join(srcdir, launchersrc, "pyw.exe") + launcherdir.start_component("launcherw", flags = 8+256, keyfile="pyw.exe") + launcherdir.add_file(launcherw, + version=installer.FileVersion(launcherw, 0), + language=installer.FileVersion(launcherw, 1)) pydll = "python%s%s.dll" % (major, minor) pydllsrc = os.path.join(srcdir, PCBUILD, pydll) dlldir.start_component("DLLDIR", flags = 8, keyfile = pydll, uuid = pythondll_uuid) - installer = msilib.MakeInstaller() pyversion = installer.FileVersion(pydllsrc, 0) if not snapshot: # For releases, the Python DLL has the same version as the @@ -952,9 +1008,8 @@ def add_files(db): # pointing to the root directory root.start_component("msvcr90", feature=private_crt) # Results are ID,keyword pairs - manifest, crtdll = extract_msvcr90() - root.add_file(manifest[0], **manifest[1]) - root.add_file(crtdll[0], **crtdll[1]) + crtdll, kwds = extract_msvcr100() + root.add_file(crtdll, **kwds) # Copy the manifest # Actually, don't do that anymore - no DLL in DLLs should have a manifest # dependency on msvcr90.dll anymore, so this should not be necessary @@ -975,104 +1030,40 @@ def add_files(db): # Add all .py files in Lib, except tkinter, test dirs = [] - pydirs = [(root,"Lib")] + pydirs = [(root, "Lib", hgfiles["Lib"], default_feature)] while pydirs: # Commit every now and then, or else installer will complain db.Commit() - parent, dir = pydirs.pop() - if dir == ".svn" or dir == '__pycache__' or dir.startswith("plat-"): + parent, dir, files, feature = pydirs.pop() + if dir.startswith("plat-"): continue - elif dir in ["tkinter", "idlelib", "Icons"]: + if dir in ["tkinter", "idlelib", "turtledemo"]: if not have_tcl: continue + feature = tcltk tcltk.set_current() - elif dir in ['test', 'tests', 'data', 'output']: - # test: Lib, Lib/email, Lib/ctypes, Lib/sqlite3 - # tests: Lib/distutils - # data: Lib/email/test - # output: Lib/test - testsuite.set_current() + elif dir in ('test', 'tests'): + feature = testsuite elif not have_ctypes and dir == "ctypes": continue - else: - default_feature.set_current() + feature.set_current() lib = PyDirectory(db, cab, parent, dir, dir, "%s|%s" % (parent.make_short(dir), dir)) - # Add additional files dirs.append(lib) - lib.glob("*.txt") - if dir=='site-packages': - lib.add_file("README.txt", src="README") - continue - files = lib.glob("*.py") - files += lib.glob("*.pyw") - if files: - # Add an entry to the RemoveFile table to remove bytecode files. - lib.remove_pyc() - # package READMEs if present - lib.glob("README") - if dir=='Lib': - lib.add_file('wsgiref.egg-info') - if dir=='test' and parent.physical=='Lib': - lib.add_file("185test.db") - lib.add_file("audiotest.au") - lib.add_file("sgml_input.html") - lib.add_file("testtar.tar") - lib.add_file("test_difflib_expect.html") - lib.add_file("check_soundcard.vbs") - lib.add_file("empty.vbs") - lib.add_file("Sine-1000Hz-300ms.aif") - lib.add_file("mime.types") - lib.glob("*.uue") - lib.glob("*.pem") - lib.glob("*.pck") - lib.glob("cfgparser.*") - lib.add_file("zip_cp437_header.zip") - lib.add_file("zipdir.zip") - if dir=='capath': - lib.glob("*.0") - if dir=='tests' and parent.physical=='distutils': - lib.add_file("Setup.sample") - if dir=='decimaltestdata': - lib.glob("*.decTest") - if dir=='xmltestdata': - lib.glob("*.xml") - lib.add_file("test.xml.out") - if dir=='output': - lib.glob("test_*") - if dir=='sndhdrdata': - lib.glob("sndhdr.*") - if dir=='idlelib': - lib.glob("*.def") - lib.add_file("idle.bat") - lib.add_file("ChangeLog") - if dir=="Icons": - lib.glob("*.gif") - lib.add_file("idle.icns") - if dir=="command" and parent.physical=="distutils": - lib.glob("wininst*.exe") - lib.add_file("command_template") - if dir=="lib2to3": - lib.removefile("pickle", "*.pickle") - if dir=="macholib": - lib.add_file("README.ctypes") - lib.glob("fetch_macholib*") - if dir=='turtledemo': - lib.add_file("turtle.cfg") - if dir=="pydoc_data": - lib.add_file("_pydoc.css") - if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email": - # This should contain all non-.svn files listed in subversion - for f in os.listdir(lib.absolute): - if f.endswith(".txt") or f==".svn":continue - if f.endswith(".au") or f.endswith(".gif"): - lib.add_file(f) + has_py = False + for name, subdir in files.items(): + if subdir is None: + assert os.path.isfile(os.path.join(lib.absolute, name)) + if name == 'README': + lib.add_file("README.txt", src="README") else: - print("WARNING: New file %s in email/test/data" % f) - for f in os.listdir(lib.absolute): - if os.path.isdir(os.path.join(lib.absolute, f)): - pydirs.append((lib, f)) - for d in dirs: - d.check_unpackaged() + lib.add_file(name) + has_py = has_py or name.endswith(".py") or name.endswith(".pyw") + else: + assert os.path.isdir(os.path.join(lib.absolute, name)) + pydirs.append((lib, name, subdir, feature)) + + if has_py: + lib.remove_pyc() # Add DLLs default_feature.set_current() lib = DLLs @@ -1159,6 +1150,8 @@ def add_files(db): lib.add_file("README.txt", src="README") if f == 'Scripts': lib.add_file("2to3.py", src="2to3") + lib.add_file("pydoc3.py", src="pydoc3") + lib.add_file("pyvenv.py", src="pyvenv") if have_tcl: lib.start_component("pydocgui.pyw", tcltk, keyfile="pydocgui.pyw") lib.add_file("pydocgui.pyw") @@ -1190,6 +1183,8 @@ def add_registry(db): "InstallPath"), ("REGISTRY.doc", msilib.gen_uuid(), "TARGETDIR", registry_component, None, "Documentation"), + ("REGISTRY.path", msilib.gen_uuid(), "TARGETDIR", registry_component, None, + None), ("REGISTRY.def", msilib.gen_uuid(), "TARGETDIR", registry_component, None, None)] + tcldata) # See "FeatureComponents Table". @@ -1206,6 +1201,7 @@ def add_registry(db): add_data(db, "FeatureComponents", [(default_feature.id, "REGISTRY"), (htmlfiles.id, "REGISTRY.doc"), + (prepend_path.id, "REGISTRY.path"), (ext_feature.id, "REGISTRY.def")] + tcldata ) @@ -1244,11 +1240,11 @@ def add_registry(db): "text/plain", "REGISTRY.def"), #Verbs ("py.open", -1, pat % (testprefix, "", "open"), "", - r'"[TARGETDIR]python.exe" "%1" %*', "REGISTRY.def"), + r'"[LAUNCHERDIR]py.exe" "%1" %*', "REGISTRY.def"), ("pyw.open", -1, pat % (testprefix, "NoCon", "open"), "", - r'"[TARGETDIR]pythonw.exe" "%1" %*', "REGISTRY.def"), + r'"[LAUNCHERDIR]pyw.exe" "%1" %*', "REGISTRY.def"), ("pyc.open", -1, pat % (testprefix, "Compiled", "open"), "", - r'"[TARGETDIR]python.exe" "%1" %*', "REGISTRY.def"), + r'"[LAUNCHERDIR]py.exe" "%1" %*', "REGISTRY.def"), ] + tcl_verbs + [ #Icons ("py.icon", -1, pat2 % (testprefix, ""), "", @@ -1347,9 +1343,9 @@ finally: # Merge CRT into MSI file. This requires the database to be closed. mod_dir = os.path.join(os.environ["ProgramFiles"], "Common Files", "Merge Modules") if msilib.Win64: - modules = ["Microsoft_VC90_CRT_x86_x64.msm", "policy_9_0_Microsoft_VC90_CRT_x86_x64.msm"] + modules = ["Microsoft_VC100_CRT_x64.msm"] else: - modules = ["Microsoft_VC90_CRT_x86.msm","policy_9_0_Microsoft_VC90_CRT_x86.msm"] + modules = ["Microsoft_VC100_CRT_x86.msm"] for i, n in enumerate(modules): modules[i] = os.path.join(mod_dir, n) diff --git a/Tools/msi/msilib.py b/Tools/msi/msilib.py index 5795d0e..472d9d4 100644 --- a/Tools/msi/msilib.py +++ b/Tools/msi/msilib.py @@ -408,7 +408,7 @@ class Directory: self.physical = physical self.logical = logical self.component = None - self.short_names = sets.Set() + self.short_names = {} self.ids = sets.Set() self.keyfiles = {} self.componentflags = componentflags @@ -456,23 +456,25 @@ class Directory: [(feature.id, component)]) def make_short(self, file): + long = file file = re.sub(r'[\?|><:/*"+,;=\[\]]', '_', file) # restrictions on short names - parts = file.split(".") + parts = file.split(".", 1) if len(parts)>1: - suffix = parts[-1].upper() + suffix = parts[1].upper() else: - suffix = None + suffix = '' prefix = parts[0].upper() - if len(prefix) <= 8 and (not suffix or len(suffix)<=3): + if len(prefix) <= 8 and '.' not in suffix and len(suffix) <= 3: if suffix: file = prefix+"."+suffix else: file = prefix - assert file not in self.short_names + assert file not in self.short_names, (file, self.short_names[file]) else: prefix = prefix[:6] if suffix: - suffix = suffix[:3] + # last three characters of last suffix + suffix = suffix.rsplit('.')[-1][:3] pos = 1 while 1: if suffix: @@ -484,7 +486,7 @@ class Directory: assert pos < 10000 if pos in (10, 100, 1000): prefix = prefix[:-1] - self.short_names.add(file) + self.short_names[file] = long return file def add_file(self, file, src=None, version=None, language=None): diff --git a/Tools/msi/uuids.py b/Tools/msi/uuids.py deleted file mode 100644 index 80d17ad..0000000 --- a/Tools/msi/uuids.py +++ /dev/null @@ -1,99 +0,0 @@ -# This should be extended for each Python release. -# The product code must change whenever the name of the MSI file -# changes, and when new component codes are issued for existing -# components. See "Changing the Product Code". As we change the -# component codes with every build, we need a new product code -# each time. For intermediate (snapshot) releases, they are automatically -# generated. For official releases, we record the product codes, -# so people can refer to them. -product_codes = { - '2.5.101': '{bc14ce3e-5e72-4a64-ac1f-bf59a571898c}', # 2.5a1 - '2.5.102': '{5eed51c1-8e9d-4071-94c5-b40de5d49ba5}', # 2.5a2 - '2.5.103': '{73dcd966-ffec-415f-bb39-8342c1f47017}', # 2.5a3 - '2.5.111': '{c797ecf8-a8e6-4fec-bb99-526b65f28626}', # 2.5b1 - '2.5.112': '{32beb774-f625-439d-b587-7187487baf15}', # 2.5b2 - '2.5.113': '{89f23918-11cf-4f08-be13-b9b2e6463fd9}', # 2.5b3 - '2.5.121': '{8e9321bc-6b24-48a3-8fd4-c95f8e531e5f}', # 2.5c1 - '2.5.122': '{a6cd508d-9599-45da-a441-cbffa9f7e070}', # 2.5c2 - '2.5.150': '{0a2c5854-557e-48c8-835a-3b9f074bdcaa}', # 2.5.0 - '2.5.1121':'{0378b43e-6184-4c2f-be1a-4a367781cd54}', # 2.5.1c1 - '2.5.1150':'{31800004-6386-4999-a519-518f2d78d8f0}', # 2.5.1 - '2.5.2150':'{6304a7da-1132-4e91-a343-a296269eab8a}', # 2.5.2c1 - '2.5.2150':'{6b976adf-8ae8-434e-b282-a06c7f624d2f}', # 2.5.2 - '2.6.101': '{0ba82e1b-52fd-4e03-8610-a6c76238e8a8}', # 2.6a1 - '2.6.102': '{3b27e16c-56db-4570-a2d3-e9a26180c60b}', # 2.6a2 - '2.6.103': '{cd06a9c5-bde5-4bd7-9874-48933997122a}', # 2.6a3 - '2.6.104': '{dc6ed634-474a-4a50-a547-8de4b7491e53}', # 2.6a4 - '2.6.111': '{3f82079a-5bee-4c4a-8a41-8292389e24ae}', # 2.6b1 - '2.6.112': '{8a0e5970-f3e6-4737-9a2b-bc5ff0f15fb5}', # 2.6b2 - '2.6.113': '{df4f5c21-6fcc-4540-95de-85feba634e76}', # 2.6b3 - '2.6.121': '{bbd34464-ddeb-4028-99e5-f16c4a8fbdb3}', # 2.6c1 - '2.6.122': '{8f64787e-a023-4c60-bfee-25d3a3f592c6}', # 2.6c2 - '2.6.150': '{110eb5c4-e995-4cfb-ab80-a5f315bea9e8}', # 2.6.0 - '2.6.1150':'{9cc89170-000b-457d-91f1-53691f85b223}', # 2.6.1 - '2.6.2121':'{adac412b-b209-4c15-b6ab-dca1b6e47144}', # 2.6.2c1 - '2.6.2150':'{24aab420-4e30-4496-9739-3e216f3de6ae}', # 2.6.2 - '2.6.3121':'{a73e0254-dcda-4fe4-bf37-c7e1c4f4ebb6}', # 2.6.3c1 - '2.6.3150':'{3d9ac095-e115-4e94-bdef-7f7edf17697d}', # 2.6.3 - '2.6.4121':'{727de605-0359-4606-a94b-c2033652379b}', # 2.6.4c1 - '2.6.4122':'{4f7603c6-6352-4299-a398-150a31b19acc}', # 2.6.4c2 - '2.6.4150':'{e7394a0f-3f80-45b1-87fc-abcd51893246}', # 2.6.4 - '2.6.5121':'{e0e273d7-7598-4701-8325-c90c069fd5ff}', # 2.6.5c1 - '2.6.5122':'{fa227b76-0671-4dc6-b826-c2ff2a70dfd5}', # 2.6.5c2 - '2.6.5150':'{4723f199-fa64-4233-8e6e-9fccc95a18ee}', # 2.6.5 - '2.7.101': '{eca1bbef-432c-49ae-a667-c213cc7bbf22}', # 2.7a1 - '2.7.102': '{21ce16ed-73c4-460d-9b11-522f417b2090}', # 2.7a2 - '2.7.103': '{6e7dbd55-ba4a-48ac-a688-6c75db4d7500}', # 2.7a3 - '2.7.104': '{ee774ba3-74a5-48d9-b425-b35a287260c8}', # 2.7a4 - '2.7.111': '{9cfd9ec7-a9c7-4980-a1c6-054fc6493eb3}', # 2.7b1 - '2.7.112': '{9a72faf6-c304-4165-8595-9291ff30cac6}', # 2.7b2 - '2.7.121': '{f530c94a-dd53-4de9-948e-b632b9cb48d2}', # 2.7c1 - '2.7.122': '{f80905d2-dd8d-4b8e-8a40-c23c93dca07d}', # 2.7c2 - '2.7.150': '{20c31435-2a0a-4580-be8b-ac06fc243ca4}', # 2.7.0 - '3.0.101': '{8554263a-3242-4857-9359-aa87bc2c58c2}', # 3.0a1 - '3.0.102': '{692d6e2c-f0ac-40b8-a133-7191aeeb67f9}', # 3.0a2 - '3.0.103': '{49cb2995-751a-4753-be7a-d0b1bb585e06}', # 3.0a3 - '3.0.104': '{87cb019e-19fd-4238-b1c7-85751437d646}', # 3.0a4 - '3.0.105': '{cf2659af-19ec-43d2-8c35-0f6a09439d42}', # 3.0a5 - '3.0.111': '{36c26f55-837d-45cf-848c-5f5c0fb47a28}', # 3.0b1 - '3.0.112': '{056a0fbc-c8fe-4c61-aade-c4411b70c998}', # 3.0b2 - '3.0.113': '{2b2e89a9-83af-43f9-b7d5-96e80c5a3f26}', # 3.0b3 - '3.0.114': '{e95c31af-69be-4dd7-96e6-e5fc85e660e6}', # 3.0b4 - '3.0.121': '{d0979c5e-cd3c-42ec-be4c-e294da793573}', # 3.0c1 - '3.0.122': '{f707b8e9-a257-4045-818e-4923fc20fbb6}', # 3.0c2 - '3.0.123': '{5e7208f1-8643-4ea2-ab5e-4644887112e3}', # 3.0c3 - '3.0.150': '{e0e56e21-55de-4f77-a109-1baa72348743}', # 3.0.0 - '3.0.1121':'{d35b1ea5-3d70-4872-bf7e-cd066a77a9c9}', # 3.0.1c1 - '3.0.1150':'{de2f2d9c-53e2-40ee-8209-74da63cb060e}', # 3.0.1 - '3.0.2121':'{cef79e7f-9809-49e2-afd2-e24148d7c855}', # 3.0.2c1 - '3.0.2150':'{0cf3b95a-8382-4607-9779-c36407ff362c}', # 3.0.2 - '3.1.101': '{c423eada-c498-4d51-9eb4-bfeae647e0a0}', # 3.1a1 - '3.1.102': '{f6e199bf-dc64-42f3-87d4-1525991a013e}', # 3.1a2 - '3.1.111': '{c3c82893-69b2-4676-8554-1b6ee6c191e9}', # 3.1b1 - '3.1.121': '{da2b5170-12f3-4d99-8a1f-54926cca7acd}', # 3.1c1 - '3.1.122': '{bceb5133-e2ee-4109-951f-ac7e941a1692}', # 3.1c2 - '3.1.150': '{3ad61ee5-81d2-4d7e-adef-da1dd37277d1}', # 3.1.0 - '3.1.1121':'{5782f957-6d49-41d4-bad0-668715dfd638}', # 3.1.1c1 - '3.1.1150':'{7ff90460-89b7-435b-b583-b37b2815ccc7}', # 3.1.1 - '3.1.2121':'{ec45624a-378c-43be-91f3-3f7a59b0d90c}', # 3.1.2c1 - '3.1.2150':'{d40af016-506c-43fb-a738-bd54fa8c1e85}', # 3.1.2 - '3.2.101' :'{b411f168-7a36-4fff-902c-a554d1c78a4f}', # 3.2a1 - '3.2.102' :'{79ff73b7-8359-410f-b9c5-152d2026f8c8}', # 3.2a2 - '3.2.103' :'{e7635c65-c221-4b9b-b70a-5611b8369d77}', # 3.2a3 - '3.2.104' :'{748cd139-75b8-4ca8-98a7-58262298181e}', # 3.2a4 - '3.2.111' :'{20bfc16f-c7cd-4fc0-8f96-9914614a3c50}', # 3.2b1 - '3.2.112' :'{0e350c98-8d73-4993-b686-cfe87160046e}', # 3.2b2 - '3.2.121' :'{2094968d-7583-47f6-a7fd-22304532e09f}', # 3.2rc1 - '3.2.122' :'{4f3edfa6-cf70-469a-825f-e1206aa7f412}', # 3.2rc2 - '3.2.123' :'{90c673d7-8cfd-4969-9816-f7d70bad87f3}', # 3.2rc3 - '3.2.150' :'{b2042d5e-986d-44ec-aee3-afe4108ccc93}', # 3.2.0 - '3.2.1121':'{4f90de4a-83dd-4443-b625-ca130ff361dd}', # 3.2.1rc1 - '3.2.1122':'{dc5eb04d-ff8a-4bed-8f96-23942fd59e5f}', # 3.2.1rc2 - '3.2.1150':'{34b2530c-6349-4292-9dc3-60bda4aed93c}', # 3.2.1 - '3.2.2121':'{DFB29A53-ACC4-44e6-85A6-D0DA26FE8E4E}', # 3.2.2rc1 - '3.2.2150':'{4CDE3168-D060-4b7c-BC74-4D8F9BB01AFD}', # 3.2.2 - '3.2.3121':'{B8E8CFF7-E4C6-4a7c-9F06-BB3A8B75DDA8}', # 3.2.3rc1 - '3.2.3122':'{E8DCD3E0-12B6-4fb7-9DB5-543C2E67372E}', # 3.2.3rc2 - '3.2.3150':'{789C9644-9F82-44d3-B4CA-AC31F46F5882}', # 3.2.3 - -} diff --git a/Tools/parser/test_unparse.py b/Tools/parser/test_unparse.py index 2ac1ea6..be84400 100644 --- a/Tools/parser/test_unparse.py +++ b/Tools/parser/test_unparse.py @@ -2,9 +2,10 @@ import unittest import test.support import io import os +import random import tokenize -import ast import unparse +import ast def read_pyfile(filename): """Read and return the contents of a Python source file (as a @@ -93,6 +94,21 @@ finally: suite5 """ +with_simple = """\ +with f(): + suite1 +""" + +with_as = """\ +with f() as x: + suite1 +""" + +with_two_items = """\ +with f() as x, g() as y: + suite1 +""" + class ASTTestCase(unittest.TestCase): def assertASTEqual(self, ast1, ast2): self.assertEqual(ast.dump(ast1), ast.dump(ast2)) @@ -215,6 +231,15 @@ class UnparseTestCase(ASTTestCase): self.check_roundtrip("a, *b[0], c = seq") self.check_roundtrip("a, *(b, c) = seq") + def test_with_simple(self): + self.check_roundtrip(with_simple) + + def test_with_as(self): + self.check_roundtrip(with_as) + + def test_with_two_items(self): + self.check_roundtrip(with_two_items) + class DirectoryTestCase(ASTTestCase): """Test roundtrip behaviour on all files in Lib and Lib/test.""" @@ -233,6 +258,10 @@ class DirectoryTestCase(ASTTestCase): if n.endswith('.py') and not n.startswith('bad'): names.append(os.path.join(test_dir, n)) + # Test limited subset of files unless the 'cpu' resource is specified. + if not test.support.is_resource_enabled("cpu"): + names = random.sample(names, 10) + for filename in names: if test.support.verbose: print('Testing %s' % filename) diff --git a/Tools/parser/unparse.py b/Tools/parser/unparse.py index d9fca97..b55e2c6 100644 --- a/Tools/parser/unparse.py +++ b/Tools/parser/unparse.py @@ -1,6 +1,5 @@ "Usage: unparse.py <path to source file>" import sys -import math import ast import tokenize import io @@ -147,6 +146,14 @@ class Unparser: self.dispatch(t.value) self.write(")") + def _YieldFrom(self, t): + self.write("(") + self.write("yield from") + if t.value: + self.write(" ") + self.dispatch(t.value) + self.write(")") + def _Raise(self, t): self.fill("raise") if not t.exc: @@ -158,12 +165,11 @@ class Unparser: self.write(" from ") self.dispatch(t.cause) - def _TryExcept(self, t): + def _Try(self, t): self.fill("try") self.enter() self.dispatch(t.body) self.leave() - for ex in t.handlers: self.dispatch(ex) if t.orelse: @@ -171,22 +177,12 @@ class Unparser: self.enter() self.dispatch(t.orelse) self.leave() - - def _TryFinally(self, t): - if len(t.body) == 1 and isinstance(t.body[0], ast.TryExcept): - # try-except-finally - self.dispatch(t.body) - else: - self.fill("try") + if t.finalbody: + self.fill("finally") self.enter() - self.dispatch(t.body) + self.dispatch(t.finalbody) self.leave() - self.fill("finally") - self.enter() - self.dispatch(t.finalbody) - self.leave() - def _ExceptHandler(self, t): self.fill("except") if t.type: @@ -296,10 +292,7 @@ class Unparser: def _With(self, t): self.fill("with ") - self.dispatch(t.context_expr) - if t.optional_vars: - self.write(" as ") - self.dispatch(t.optional_vars) + interleave(lambda: self.write(", "), self.dispatch, t.items) self.enter() self.dispatch(t.body) self.leave() @@ -564,6 +557,12 @@ class Unparser: if t.asname: self.write(" as "+t.asname) + def _withitem(self, t): + self.dispatch(t.context_expr) + if t.optional_vars: + self.write(" as ") + self.dispatch(t.optional_vars) + def roundtrip(filename, output=sys.stdout): with open(filename, "rb") as pyfile: encoding = tokenize.detect_encoding(pyfile.readline)[0] diff --git a/Tools/pybench/pybench.py b/Tools/pybench/pybench.py index 8eaad63..942f56d 100755 --- a/Tools/pybench/pybench.py +++ b/Tools/pybench/pybench.py @@ -35,7 +35,9 @@ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE ! """ -import sys, time, operator, platform +import sys +import time +import platform from CommandLine import * try: @@ -73,11 +75,15 @@ ALLOW_SKIPPING_CALIBRATION = 1 # Timer types TIMER_TIME_TIME = 'time.time' +TIMER_TIME_PROCESS_TIME = 'time.process_time' +TIMER_TIME_PERF_COUNTER = 'time.perf_counter' TIMER_TIME_CLOCK = 'time.clock' TIMER_SYSTIMES_PROCESSTIME = 'systimes.processtime' # Choose platform default timer -if sys.platform[:3] == 'win': +if hasattr(time, 'perf_counter'): + TIMER_PLATFORM_DEFAULT = TIMER_TIME_PERF_COUNTER +elif sys.platform[:3] == 'win': # On WinXP this has 2.5ms resolution TIMER_PLATFORM_DEFAULT = TIMER_TIME_CLOCK else: @@ -93,6 +99,10 @@ def get_timer(timertype): if timertype == TIMER_TIME_TIME: return time.time + elif timertype == TIMER_TIME_PROCESS_TIME: + return time.process_time + elif timertype == TIMER_TIME_PERF_COUNTER: + return time.perf_counter elif timertype == TIMER_TIME_CLOCK: return time.clock elif timertype == TIMER_SYSTIMES_PROCESSTIME: @@ -107,6 +117,7 @@ def get_machine_details(): print('Getting machine details...') buildno, builddate = platform.python_build() python = platform.python_version() + # XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+? if sys.maxunicode == 65535: # UCS2 build (standard) unitype = 'UCS2' @@ -865,7 +876,18 @@ python pybench.py -s p25.pybench -c p21.pybench print('* using timer: systimes.processtime (%s)' % \ systimes.SYSTIMES_IMPLEMENTATION) else: + # Check that the clock function does exist + try: + get_timer(timer) + except TypeError: + print("* Error: Unknown timer: %s" % timer) + return + print('* using timer: %s' % timer) + if hasattr(time, 'get_clock_info'): + info = time.get_clock_info(timer[5:]) + print('* timer: resolution=%s, implementation=%s' + % (info.resolution, info.implementation)) print() @@ -943,8 +965,6 @@ python pybench.py -s p25.pybench -c p21.pybench pickle.dump(bench,f) f.close() except IOError as reason: - print('* Error opening/writing reportfile') - except IOError as reason: print('* Error opening/writing reportfile %s: %s' % ( reportfile, reason)) diff --git a/Tools/scripts/README b/Tools/scripts/README index 8c02529..d65d1fd 100644 --- a/Tools/scripts/README +++ b/Tools/scripts/README @@ -15,7 +15,7 @@ db2pickle.py Dump a database file to a pickle diff.py Print file diffs in context, unified, or ndiff formats dutree.py Format du(1) output as a tree sorted by size eptags.py Create Emacs TAGS file for Python modules -find_recursionlimit.py Find the maximum recursion limit on this machine +find_recursionlimit.py Find the maximum recursion limit on this machine finddiv.py A grep-like tool that looks for division operators findlinksto.py Recursively find symbolic links to a given path prefix findnocoding.py Find source files which need an encoding declaration @@ -28,6 +28,7 @@ ftpmirror.py FTP mirror script google.py Open a webbrowser with Google gprof2html.py Transform gprof(1) output into useful HTML h2py.py Translate #define's into Python assignments +highlight.py Python syntax highlighting with HTML output idle3 Main program to start IDLE ifdef.py Remove #if(n)def groups from C sources lfcr.py Change LF line endings to CRLF (Unix to Windows) @@ -53,6 +54,7 @@ redemo.py Basic regular expression demonstration facility reindent.py Change .py files to use 4-space indents reindent-rst.py Fix-up reStructuredText file whitespace rgrep.py Reverse grep through a file (useful for big logfiles) +run_tests.py Run the test suite with more sensible default options serve.py Small wsgiref-based web server, used in make serve in Doc suff.py Sort a list of files by suffix svneol.py Set svn:eol-style on all files in directory diff --git a/Tools/scripts/diff.py b/Tools/scripts/diff.py index 9efb078..f9b14bf 100755 --- a/Tools/scripts/diff.py +++ b/Tools/scripts/diff.py @@ -9,6 +9,12 @@ """ import sys, os, time, difflib, optparse +from datetime import datetime, timezone + +def file_mtime(path): + t = datetime.fromtimestamp(os.stat(path).st_mtime, + timezone.utc) + return t.astimezone().isoformat() def main(): @@ -30,10 +36,12 @@ def main(): n = options.lines fromfile, tofile = args - fromdate = time.ctime(os.stat(fromfile).st_mtime) - todate = time.ctime(os.stat(tofile).st_mtime) - fromlines = open(fromfile, 'U').readlines() - tolines = open(tofile, 'U').readlines() + fromdate = file_mtime(fromfile) + todate = file_mtime(tofile) + with open(fromfile, 'U') as ff: + fromlines = ff.readlines() + with open(tofile, 'U') as tf: + tolines = tf.readlines() if options.u: diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n) diff --git a/Tools/scripts/find_recursionlimit.py b/Tools/scripts/find_recursionlimit.py index 7a86603..1171146 100755 --- a/Tools/scripts/find_recursionlimit.py +++ b/Tools/scripts/find_recursionlimit.py @@ -89,6 +89,12 @@ def test_cpickle(_cache={}): _pickle.Pickler(io.BytesIO(), protocol=-1).dump(l) _cache[n] = l +def test_compiler_recursion(): + # The compiler uses a scaling factor to support additional levels + # of recursion. This is a sanity check of that scaling to ensure + # it still raises RuntimeError even at higher recursion limits + compile("()" * (10 * sys.getrecursionlimit()), "<single>", "single") + def check_limit(n, test_func_name): sys.setrecursionlimit(n) if test_func_name.startswith("test_"): @@ -117,5 +123,6 @@ if __name__ == '__main__': check_limit(limit, "test_getattr") check_limit(limit, "test_getitem") check_limit(limit, "test_cpickle") + check_limit(limit, "test_compiler_recursion") print("Limit of %d is fine" % limit) limit = limit + 100 diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py index a494a48..b3e9dc7 100755 --- a/Tools/scripts/findnocoding.py +++ b/Tools/scripts/findnocoding.py @@ -2,7 +2,7 @@ """List all those Python files that require a coding directive -Usage: nocoding.py dir1 [dir2...] +Usage: findnocoding.py dir1 [dir2...] """ __author__ = "Oleg Broytmann, Georg Brandl" @@ -32,7 +32,7 @@ except ImportError: "no sophisticated Python source file search will be done.", file=sys.stderr) -decl_re = re.compile(r"coding[=:]\s*([-\w.]+)") +decl_re = re.compile(rb"coding[=:]\s*([-\w.]+)") def get_declaration(line): match = decl_re.search(line) @@ -50,21 +50,20 @@ def has_correct_encoding(text, codec): def needs_declaration(fullpath): try: - infile = open(fullpath, 'rU') + infile = open(fullpath, 'rb') except IOError: # Oops, the file was removed - ignore it return None - line1 = infile.readline() - line2 = infile.readline() + with infile: + line1 = infile.readline() + line2 = infile.readline() - if get_declaration(line1) or get_declaration(line2): - # the file does have an encoding declaration, so trust it - infile.close() - return False + if get_declaration(line1) or get_declaration(line2): + # the file does have an encoding declaration, so trust it + return False - # check the whole file for non utf-8 characters - rest = infile.read() - infile.close() + # check the whole file for non utf-8 characters + rest = infile.read() if has_correct_encoding(line1+line2+rest, "utf-8"): return False diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py new file mode 100755 index 0000000..aff5cae --- /dev/null +++ b/Tools/scripts/highlight.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +'''Add syntax highlighting to Python source code''' + +__author__ = 'Raymond Hettinger' + +import keyword, tokenize, cgi, re, functools +try: + import builtins +except ImportError: + import __builtin__ as builtins + +#### Analyze Python Source ################################# + +def is_builtin(s): + 'Return True if s is the name of a builtin' + return hasattr(builtins, s) + +def combine_range(lines, start, end): + 'Join content from a range of lines between start and end' + (srow, scol), (erow, ecol) = start, end + if srow == erow: + return lines[srow-1][scol:ecol], end + rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] + return ''.join(rows), end + +def analyze_python(source): + '''Generate and classify chunks of Python for syntax highlighting. + Yields tuples in the form: (category, categorized_text). + ''' + lines = source.splitlines(True) + lines.append('') + readline = functools.partial(next, iter(lines), '') + kind = tok_str = '' + tok_type = tokenize.COMMENT + written = (1, 0) + for tok in tokenize.generate_tokens(readline): + prev_tok_type, prev_tok_str = tok_type, tok_str + tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok + kind = '' + if tok_type == tokenize.COMMENT: + kind = 'comment' + elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@': + kind = 'operator' + elif tok_type == tokenize.STRING: + kind = 'string' + if prev_tok_type == tokenize.INDENT or scol==0: + kind = 'docstring' + elif tok_type == tokenize.NAME: + if tok_str in ('def', 'class', 'import', 'from'): + kind = 'definition' + elif prev_tok_str in ('def', 'class'): + kind = 'defname' + elif keyword.iskeyword(tok_str): + kind = 'keyword' + elif is_builtin(tok_str) and prev_tok_str != '.': + kind = 'builtin' + if kind: + text, written = combine_range(lines, written, (srow, scol)) + yield '', text + text, written = tok_str, (erow, ecol) + yield kind, text + line_upto_token, written = combine_range(lines, written, (erow, ecol)) + yield '', line_upto_token + +#### Raw Output ########################################### + +def raw_highlight(classified_text): + 'Straight text display of text classifications' + result = [] + for kind, text in classified_text: + result.append('%15s: %r\n' % (kind or 'plain', text)) + return ''.join(result) + +#### ANSI Output ########################################### + +default_ansi = { + 'comment': ('\033[0;31m', '\033[0m'), + 'string': ('\033[0;32m', '\033[0m'), + 'docstring': ('\033[0;32m', '\033[0m'), + 'keyword': ('\033[0;33m', '\033[0m'), + 'builtin': ('\033[0;35m', '\033[0m'), + 'definition': ('\033[0;33m', '\033[0m'), + 'defname': ('\033[0;34m', '\033[0m'), + 'operator': ('\033[0;33m', '\033[0m'), +} + +def ansi_highlight(classified_text, colors=default_ansi): + 'Add syntax highlighting to source code using ANSI escape sequences' + # http://en.wikipedia.org/wiki/ANSI_escape_code + result = [] + for kind, text in classified_text: + opener, closer = colors.get(kind, ('', '')) + result += [opener, text, closer] + return ''.join(result) + +#### HTML Output ########################################### + +def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'): + 'Convert classified text to an HTML fragment' + result = [opener] + for kind, text in classified_text: + if kind: + result.append('<span class="%s">' % kind) + result.append(cgi.escape(text)) + if kind: + result.append('</span>') + result.append(closer) + return ''.join(result) + +default_css = { + '.comment': '{color: crimson;}', + '.string': '{color: forestgreen;}', + '.docstring': '{color: forestgreen; font-style:italic;}', + '.keyword': '{color: darkorange;}', + '.builtin': '{color: purple;}', + '.definition': '{color: darkorange; font-weight:bold;}', + '.defname': '{color: blue;}', + '.operator': '{color: brown;}', +} + +default_html = '''\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" + "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<meta http-equiv="Content-type" content="text/html;charset=UTF-8"> +<title> {title} </title> +<style type="text/css"> +{css} +</style> +</head> +<body> +{body} +</body> +</html> +''' + +def build_html_page(classified_text, title='python', + css=default_css, html=default_html): + 'Create a complete HTML page with colorized source code' + css_str = '\n'.join(['%s %s' % item for item in css.items()]) + result = html_highlight(classified_text) + title = cgi.escape(title) + return html.format(title=title, css=css_str, body=result) + +#### LaTeX Output ########################################## + +default_latex_commands = { + 'comment': '{\color{red}#1}', + 'string': '{\color{ForestGreen}#1}', + 'docstring': '{\emph{\color{ForestGreen}#1}}', + 'keyword': '{\color{orange}#1}', + 'builtin': '{\color{purple}#1}', + 'definition': '{\color{orange}#1}', + 'defname': '{\color{blue}#1}', + 'operator': '{\color{brown}#1}', +} + +default_latex_document = r''' +\documentclass{article} +\usepackage{alltt} +\usepackage{upquote} +\usepackage{color} +\usepackage[usenames,dvipsnames]{xcolor} +\usepackage[cm]{fullpage} +%(macros)s +\begin{document} +\center{\LARGE{%(title)s}} +\begin{alltt} +%(body)s +\end{alltt} +\end{document} +''' + +def alltt_escape(s): + 'Replace backslash and braces with their escaped equivalents' + xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'} + return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s) + +def latex_highlight(classified_text, title = 'python', + commands = default_latex_commands, + document = default_latex_document): + 'Create a complete LaTeX document with colorized source code' + macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items()) + result = [] + for kind, text in classified_text: + if kind: + result.append(r'\py%s{' % kind) + result.append(alltt_escape(text)) + if kind: + result.append('}') + return default_latex_document % dict(title=title, macros=macros, body=''.join(result)) + + +if __name__ == '__main__': + import sys, argparse, webbrowser, os, textwrap + + parser = argparse.ArgumentParser( + description = 'Add syntax highlighting to Python source code', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog = textwrap.dedent(''' + examples: + + # Show syntax highlighted code in the terminal window + $ ./highlight.py myfile.py + + # Colorize myfile.py and display in a browser + $ ./highlight.py -b myfile.py + + # Create an HTML section to embed in an existing webpage + ./highlight.py -s myfile.py + + # Create a complete HTML file + $ ./highlight.py -c myfile.py > myfile.html + + # Create a PDF using LaTeX + $ ./highlight.py -l myfile.py | pdflatex + + ''')) + parser.add_argument('sourcefile', metavar = 'SOURCEFILE', + help = 'file containing Python sourcecode') + parser.add_argument('-b', '--browser', action = 'store_true', + help = 'launch a browser to show results') + parser.add_argument('-c', '--complete', action = 'store_true', + help = 'build a complete html webpage') + parser.add_argument('-l', '--latex', action = 'store_true', + help = 'build a LaTeX document') + parser.add_argument('-r', '--raw', action = 'store_true', + help = 'raw parse of categorized text') + parser.add_argument('-s', '--section', action = 'store_true', + help = 'show an HTML section rather than a complete webpage') + args = parser.parse_args() + + if args.section and (args.browser or args.complete): + parser.error('The -s/--section option is incompatible with ' + 'the -b/--browser or -c/--complete options') + + sourcefile = args.sourcefile + with open(sourcefile) as f: + source = f.read() + classified_text = analyze_python(source) + + if args.raw: + encoded = raw_highlight(classified_text) + elif args.complete or args.browser: + encoded = build_html_page(classified_text, title=sourcefile) + elif args.section: + encoded = html_highlight(classified_text) + elif args.latex: + encoded = latex_highlight(classified_text, title=sourcefile) + else: + encoded = ansi_highlight(classified_text) + + if args.browser: + htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html' + with open(htmlfile, 'w') as f: + f.write(encoded) + webbrowser.open('file://' + os.path.abspath(htmlfile)) + else: + sys.stdout.write(encoded) diff --git a/Tools/scripts/import_diagnostics.py b/Tools/scripts/import_diagnostics.py new file mode 100755 index 0000000..c907221 --- /dev/null +++ b/Tools/scripts/import_diagnostics.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Miscellaneous diagnostics for the import system""" + +import sys +import argparse +from pprint import pprint + +def _dump_state(args): + print(sys.version) + for name in args.attributes: + print("sys.{}:".format(name)) + pprint(getattr(sys, name)) + +def _add_dump_args(cmd): + cmd.add_argument("attributes", metavar="ATTR", nargs="+", + help="sys module attribute to display") + +COMMANDS = ( + ("dump", "Dump import state", _dump_state, _add_dump_args), +) + +def _make_parser(): + parser = argparse.ArgumentParser() + sub = parser.add_subparsers(title="Commands") + for name, description, implementation, add_args in COMMANDS: + cmd = sub.add_parser(name, help=description) + cmd.set_defaults(command=implementation) + add_args(cmd) + return parser + +def main(args): + parser = _make_parser() + args = parser.parse_args(args) + return args.command(args) + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/Tools/scripts/patchcheck.py b/Tools/scripts/patchcheck.py index 6a39145..7b8857e 100755 --- a/Tools/scripts/patchcheck.py +++ b/Tools/scripts/patchcheck.py @@ -49,29 +49,15 @@ def mq_patches_applied(): @status("Getting the list of files that have been added/changed", info=lambda x: n_files_str(len(x))) def changed_files(): - """Get the list of changed or added files from the VCS.""" - if os.path.isdir(os.path.join(SRCDIR, '.hg')): - vcs = 'hg' - cmd = 'hg status --added --modified --no-status' - if mq_patches_applied(): - cmd += ' --rev qparent' - elif os.path.isdir('.svn'): - vcs = 'svn' - cmd = 'svn status --quiet --non-interactive --ignore-externals' - else: + """Get the list of changed or added files from Mercurial.""" + if not os.path.isdir(os.path.join(SRCDIR, '.hg')): sys.exit('need a checkout to get modified files') - st = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) - try: - st.wait() - if vcs == 'hg': - return [x.decode().rstrip() for x in st.stdout] - else: - output = (x.decode().rstrip().rsplit(None, 1)[-1] - for x in st.stdout if x[0] in b'AM') - return set(path for path in output if os.path.isfile(path)) - finally: - st.stdout.close() + cmd = 'hg status --added --modified --no-status' + if mq_patches_applied(): + cmd += ' --rev qparent' + with subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) as st: + return [x.decode().rstrip() for x in st.stdout] def report_modified_files(file_paths): @@ -89,10 +75,8 @@ def report_modified_files(file_paths): def normalize_whitespace(file_paths): """Make sure that the whitespace for .py files have been normalized.""" reindent.makebackup = False # No need to create backups. - fixed = [] - for path in (x for x in file_paths if x.endswith('.py')): - if reindent.check(os.path.join(SRCDIR, path)): - fixed.append(path) + fixed = [path for path in file_paths if path.endswith('.py') and + reindent.check(os.path.join(SRCDIR, path))] return fixed @@ -148,6 +132,21 @@ def reported_news(file_paths): """Check if Misc/NEWS has been changed.""" return 'Misc/NEWS' in file_paths +@status("configure regenerated", modal=True, info=str) +def regenerated_configure(file_paths): + """Check if configure has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'configure' in file_paths else "no" + else: + return "not needed" + +@status("pyconfig.h.in regenerated", modal=True, info=str) +def regenerated_pyconfig_h_in(file_paths): + """Check if pyconfig.h.in has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'pyconfig.h.in' in file_paths else "no" + else: + return "not needed" def main(): file_paths = changed_files() @@ -167,6 +166,10 @@ def main(): credit_given(special_files) # Misc/NEWS changed. reported_news(special_files) + # Regenerated configure, if necessary. + regenerated_configure(file_paths) + # Regenerated pyconfig.h.in, if necessary. + regenerated_pyconfig_h_in(file_paths) # Test suite run and passed. if python_files or c_files: diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py index 048131e..69e8e0d 100755 --- a/Tools/scripts/pysource.py +++ b/Tools/scripts/pysource.py @@ -22,7 +22,7 @@ __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_pytho import os, re -binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]') +binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]') debug = False @@ -42,7 +42,7 @@ def _open(fullpath): return None try: - return open(fullpath, 'rU') + return open(fullpath, "rb") except IOError as err: # Access denied, or a special file - ignore it print_debug("%s: access denied: %s" % (fullpath, err)) return None @@ -55,8 +55,8 @@ def looks_like_python(fullpath): if infile is None: return False - line = infile.readline() - infile.close() + with infile: + line = infile.readline() if binary_re.search(line): # file appears to be binary @@ -65,7 +65,7 @@ def looks_like_python(fullpath): if fullpath.endswith(".py") or fullpath.endswith(".pyw"): return True - elif "python" in line: + elif b"python" in line: # disguised Python script (e.g. CGI) return True @@ -76,8 +76,8 @@ def can_be_compiled(fullpath): if infile is None: return False - code = infile.read() - infile.close() + with infile: + code = infile.read() try: compile(code, fullpath, "exec") diff --git a/Tools/scripts/pyvenv b/Tools/scripts/pyvenv new file mode 100755 index 0000000..978d691 --- /dev/null +++ b/Tools/scripts/pyvenv @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import sys + rc = 1 + try: + import venv + venv.main() + rc = 0 + except Exception as e: + print('Error: %s' % e, file=sys.stderr) + sys.exit(rc) diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py index b18993b..4a916ea 100755 --- a/Tools/scripts/reindent.py +++ b/Tools/scripts/reindent.py @@ -8,6 +8,8 @@ -r (--recurse) Recurse. Search for all .py files in subdirectories too. -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. -v (--verbose) Verbose. Print informative msgs; else no output. + (--newline) Newline. Specify the newline character to use (CRLF, LF). + Default is the same as the original file. -h (--help) Help. Print this usage information and exit. Change Python (.py) files to use 4-space indents and no hard tab characters. @@ -50,6 +52,8 @@ verbose = False recurse = False dryrun = False makebackup = True +spec_newline = None +"""A specified newline to be used in the output (set by --newline option)""" def usage(msg=None): @@ -62,13 +66,12 @@ def errprint(*args): sys.stderr.write(" ".join(str(arg) for arg in args)) sys.stderr.write("\n") - def main(): import getopt - global verbose, recurse, dryrun, makebackup + global verbose, recurse, dryrun, makebackup, spec_newline try: opts, args = getopt.getopt(sys.argv[1:], "drnvh", - ["dryrun", "recurse", "nobackup", "verbose", "help"]) + ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) except getopt.error as msg: usage(msg) return @@ -81,6 +84,11 @@ def main(): makebackup = False elif o in ('-v', '--verbose'): verbose = True + elif o in ('--newline',): + if not a.upper() in ('CRLF', 'LF'): + usage() + return + spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] elif o in ('-h', '--help'): usage() return @@ -118,9 +126,9 @@ def check(file): errprint("%s: I/O Error: %s" % (file, str(msg))) return - newline = r.newlines + newline = spec_newline if spec_newline else r.newlines if isinstance(newline, tuple): - errprint("%s: mixed newlines detected; cannot process file" % file) + errprint("%s: mixed newlines detected; cannot continue without --newline" % file) return if r.run(): diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py new file mode 100755 index 0000000..e2a2050 --- /dev/null +++ b/Tools/scripts/run_tests.py @@ -0,0 +1,51 @@ +"""Run Python's test suite in a fast, rigorous way. + +The defaults are meant to be reasonably thorough, while skipping certain +tests that can be time-consuming or resource-intensive (e.g. largefile), +or distracting (e.g. audio and gui). These defaults can be overridden by +simply passing a -u option to this script. + +""" + +import os +import sys +import test.support +try: + import threading +except ImportError: + threading = None + + +def is_multiprocess_flag(arg): + return arg.startswith('-j') or arg.startswith('--multiprocess') + + +def is_resource_use_flag(arg): + return arg.startswith('-u') or arg.startswith('--use') + + +def main(regrtest_args): + args = [sys.executable, + '-W', 'default', # Warnings set to 'default' + '-bb', # Warnings about bytes/bytearray + '-E', # Ignore environment variables + ] + # Allow user-specified interpreter options to override our defaults. + args.extend(test.support.args_from_interpreter_flags()) + args.extend(['-m', 'test', # Run the test suite + '-r', # Randomize test order + '-w', # Re-run failed tests in verbose mode + ]) + if sys.platform == 'win32': + args.append('-n') # Silence alerts under Windows + if threading and not any(is_multiprocess_flag(arg) for arg in regrtest_args): + args.extend(['-j', '0']) # Use all CPU cores + if not any(is_resource_use_flag(arg) for arg in regrtest_args): + args.extend(['-u', 'all,-largefile,-audio,-gui']) + args.extend(regrtest_args) + print(' '.join(args)) + os.execv(sys.executable, args) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/Tools/scripts/texi2html.py b/Tools/scripts/texi2html.py index af2147a..9983528 100755 --- a/Tools/scripts/texi2html.py +++ b/Tools/scripts/texi2html.py @@ -319,7 +319,7 @@ class TexinfoParser: # Start saving text in a buffer instead of writing it to a file def startsaving(self): - if self.savetext != None: + if self.savetext is not None: self.savestack.append(self.savetext) # print '*** Recursively saving text, expect trouble' self.savetext = '' @@ -341,7 +341,7 @@ class TexinfoParser: except: print(args) raise TypeError - if self.savetext != None: + if self.savetext is not None: self.savetext = self.savetext + text elif self.nodefp: self.nodefp.write(text) @@ -350,7 +350,7 @@ class TexinfoParser: # Complete the current node -- write footnotes and close file def endnode(self): - if self.savetext != None: + if self.savetext is not None: print('*** Still saving text at end of node') dummy = self.collectsavings() if self.footnotes: @@ -804,7 +804,7 @@ class TexinfoParser: def close_i(self): self.write('</I>') def open_footnote(self): - # if self.savetext <> None: + # if self.savetext is not None: # print '*** Recursive footnote -- expect weirdness' id = len(self.footnotes) + 1 self.write(self.FN_SOURCE_PATTERN % {'id': repr(id)}) @@ -1442,7 +1442,7 @@ class TexinfoParser: else: # some other character, e.g. '-' args = self.itemarg + ' ' + args - if self.itemnumber != None: + if self.itemnumber is not None: args = self.itemnumber + '. ' + args self.itemnumber = increment(self.itemnumber) if self.stack and self.stack[-1] == 'table': diff --git a/Tools/ssl/make_ssl_data.py b/Tools/ssl/make_ssl_data.py new file mode 100644 index 0000000..10244d1 --- /dev/null +++ b/Tools/ssl/make_ssl_data.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python3 + +""" +This script should be called *manually* when we want to upgrade SSLError +`library` and `reason` mnemnonics to a more recent OpenSSL version. + +It takes two arguments: +- the path to the OpenSSL include files' directory + (e.g. openssl-1.0.1-beta3/include/openssl/) +- the path to the C file to be generated + (probably Modules/_ssl_data.h) +""" + +import datetime +import os +import re +import sys + + +def parse_error_codes(h_file, prefix): + pat = re.compile(r"#define\W+(%s([\w]+))\W+(\d+)\b" % re.escape(prefix)) + codes = [] + with open(h_file, "r", encoding="latin1") as f: + for line in f: + match = pat.search(line) + if match: + code, name, num = match.groups() + num = int(num) + codes.append((code, name, num)) + return codes + +if __name__ == "__main__": + openssl_inc = sys.argv[1] + outfile = sys.argv[2] + use_stdout = outfile == '-' + f = sys.stdout if use_stdout else open(outfile, "w") + error_libraries = ( + # (library code, mnemonic, error prefix, header file) + ('ERR_LIB_PEM', 'PEM', 'PEM_R_', 'pem.h'), + ('ERR_LIB_SSL', 'SSL', 'SSL_R_', 'ssl.h'), + ('ERR_LIB_X509', 'X509', 'X509_R_', 'x509.h'), + ) + def w(l): + f.write(l + "\n") + w("/* File generated by Tools/ssl/make_ssl_data.py */") + w("/* Generated on %s */" % datetime.datetime.now().isoformat()) + w("") + + w("static struct py_ssl_library_code library_codes[] = {") + for libcode, mnemo, _, _ in error_libraries: + w(' {"%s", %s},' % (mnemo, libcode)) + w(' { NULL }') + w('};') + w("") + + w("static struct py_ssl_error_code error_codes[] = {") + for libcode, _, prefix, h_file in error_libraries: + codes = parse_error_codes(os.path.join(openssl_inc, h_file), prefix) + for code, name, num in sorted(codes): + w(' #ifdef %s' % (code)) + w(' {"%s", %s, %s},' % (name, libcode, code)) + w(' #else') + w(' {"%s", %s, %d},' % (name, libcode, num)) + w(' #endif') + w(' { NULL }') + w('};') + if not use_stdout: + f.close() diff --git a/Tools/stringbench/README b/Tools/stringbench/README new file mode 100644 index 0000000..a271f12 --- /dev/null +++ b/Tools/stringbench/README @@ -0,0 +1,68 @@ +stringbench is a set of performance tests comparing byte string +operations with unicode operations. The two string implementations +are loosely based on each other and sometimes the algorithm for one is +faster than the other. + +These test set was started at the Need For Speed sprint in Reykjavik +to identify which string methods could be sped up quickly and to +identify obvious places for improvement. + +Here is an example of a benchmark + + +@bench('"Andrew".startswith("A")', 'startswith single character', 1000) +def startswith_single(STR): + s1 = STR("Andrew") + s2 = STR("A") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + +The bench decorator takes three parameters. The first is a short +description of how the code works. In most cases this is Python code +snippet. It is not the code which is actually run because the real +code is hand-optimized to focus on the method being tested. + +The second parameter is a group title. All benchmarks with the same +group title are listed together. This lets you compare different +implementations of the same algorithm, such as "t in s" +vs. "s.find(t)". + +The last is a count. Each benchmark loops over the algorithm either +100 or 1000 times, depending on the algorithm performance. The output +time is the time per benchmark call so the reader needs a way to know +how to scale the performance. + +These parameters become function attributes. + + +Here is an example of the output + + +========== count newlines +38.54 41.60 92.7 ...text.with.2000.newlines.count("\n") (*100) +========== early match, single character +1.14 1.18 96.8 ("A"*1000).find("A") (*1000) +0.44 0.41 105.6 "A" in "A"*1000 (*1000) +1.15 1.17 98.1 ("A"*1000).index("A") (*1000) + +The first column is the run time in milliseconds for byte strings. +The second is the run time for unicode strings. The third is a +percentage; byte time / unicode time. It's the percentage by which +unicode is faster than byte strings. + +The last column contains the code snippet and the repeat count for the +internal benchmark loop. + +The times are computed with 'timeit.py' which repeats the test more +and more times until the total time takes over 0.2 seconds, returning +the best time for a single iteration. + +The final line of the output is the cumulative time for byte and +unicode strings, and the overall performance of unicode relative to +bytes. For example + +4079.83 5432.25 75.1 TOTAL + +However, this has no meaning as it evenly weights every test. + diff --git a/Tools/stringbench/stringbench.py b/Tools/stringbench/stringbench.py new file mode 100755 index 0000000..a0a21fa --- /dev/null +++ b/Tools/stringbench/stringbench.py @@ -0,0 +1,1482 @@ + +# Various microbenchmarks comparing unicode and byte string performance +# Please keep this file both 2.x and 3.x compatible! + +import timeit +import itertools +import operator +import re +import sys +import datetime +import optparse + +VERSION = '2.0' + +def p(*args): + sys.stdout.write(' '.join(str(s) for s in args) + '\n') + +if sys.version_info >= (3,): + BYTES = bytes_from_str = lambda x: x.encode('ascii') + UNICODE = unicode_from_str = lambda x: x +else: + BYTES = bytes_from_str = lambda x: x + UNICODE = unicode_from_str = lambda x: x.decode('ascii') + +class UnsupportedType(TypeError): + pass + + +p('stringbench v%s' % VERSION) +p(sys.version) +p(datetime.datetime.now()) + +REPEAT = 1 +REPEAT = 3 +#REPEAT = 7 + +if __name__ != "__main__": + raise SystemExit("Must run as main program") + +parser = optparse.OptionParser() +parser.add_option("-R", "--skip-re", dest="skip_re", + action="store_true", + help="skip regular expression tests") +parser.add_option("-8", "--8-bit", dest="bytes_only", + action="store_true", + help="only do 8-bit string benchmarks") +parser.add_option("-u", "--unicode", dest="unicode_only", + action="store_true", + help="only do Unicode string benchmarks") + + +_RANGE_1000 = list(range(1000)) +_RANGE_100 = list(range(100)) +_RANGE_10 = list(range(10)) + +dups = {} +def bench(s, group, repeat_count): + def blah(f): + if f.__name__ in dups: + raise AssertionError("Multiple functions with same name: %r" % + (f.__name__,)) + dups[f.__name__] = 1 + f.comment = s + f.is_bench = True + f.group = group + f.repeat_count = repeat_count + return f + return blah + +def uses_re(f): + f.uses_re = True + +####### 'in' comparisons + +@bench('"A" in "A"*1000', "early match, single character", 1000) +def in_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + for x in _RANGE_1000: + s2 in s1 + +@bench('"B" in "A"*1000', "no match, single character", 1000) +def in_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + for x in _RANGE_1000: + s2 in s1 + + +@bench('"AB" in "AB"*1000', "early match, two characters", 1000) +def in_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + for x in _RANGE_1000: + s2 in s1 + +@bench('"BC" in "AB"*1000', "no match, two characters", 1000) +def in_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + for x in _RANGE_1000: + s2 in s1 + +@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000) +def in_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + for x in _RANGE_1000: + s2 in s1 + +@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")', + "late match, 100 characters", 100) +def in_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*300 + m+e + s2 = m+e + for x in _RANGE_100: + s2 in s1 + +# Try with regex +@uses_re +@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")', + "late match, 100 characters", 100) +def re_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*300 + m+e + s2 = m+e + pat = re.compile(s2) + search = pat.search + for x in _RANGE_100: + search(s1) + + +#### same tests as 'in' but use 'find' + +@bench('("A"*1000).find("A")', "early match, single character", 1000) +def find_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("A"*1000).find("B")', "no match, single character", 1000) +def find_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + + +@bench('("AB"*1000).find("AB")', "early match, two characters", 1000) +def find_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*1000).find("BC")', "no match, two characters", 1000) +def find_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*1000).find("CA")', "no match, two characters", 1000) +def find_test_no_match_two_character_bis(STR): + s1 = STR("AB" * 1000) + s2 = STR("CA") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000) +def find_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000) +def find_test_slow_match_two_characters_bis(STR): + s1 = STR("AB" * 300+"CA") + s2 = STR("CA") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")', + "late match, 100 characters", 100) +def find_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_find = s1.find + for x in _RANGE_100: + s1_find(s2) + +@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)', + "late match, 100 characters", 100) +def find_test_slow_match_100_characters_bis(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + e+m + s2 = e+m + s1_find = s1.find + for x in _RANGE_100: + s1_find(s2) + + +#### Same tests for 'rfind' + +@bench('("A"*1000).rfind("A")', "early match, single character", 1000) +def rfind_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("A"*1000).rfind("B")', "no match, single character", 1000) +def rfind_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + + +@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000) +def rfind_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000) +def rfind_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000) +def rfind_test_no_match_two_character_bis(STR): + s1 = STR("AB" * 1000) + s2 = STR("CA") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters_bis(STR): + s1 = STR("BC" + "AB" * 300) + s2 = STR("BC") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e+m + (d+m)*500 + s2 = e+m + s1_rfind = s1.rfind + for x in _RANGE_100: + s1_rfind(s2) + +@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters_bis(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = m+e + (d+m)*500 + s2 = m+e + s1_rfind = s1.rfind + for x in _RANGE_100: + s1_rfind(s2) + + +#### Now with index. +# Skip the ones which fail because that would include exception overhead. + +@bench('("A"*1000).index("A")', "early match, single character", 1000) +def index_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_index = s1.index + for x in _RANGE_1000: + s1_index(s2) + +@bench('("AB"*1000).index("AB")', "early match, two characters", 1000) +def index_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_index = s1.index + for x in _RANGE_1000: + s1_index(s2) + +@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000) +def index_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_index = s1.index + for x in _RANGE_1000: + s1_index(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")', + "late match, 100 characters", 100) +def index_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_index = s1.index + for x in _RANGE_100: + s1_index(s2) + + +#### Same for rindex + +@bench('("A"*1000).rindex("A")', "early match, single character", 1000) +def rindex_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000) +def rindex_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000) +def rindex_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)', + "late match, 100 characters", 100) +def rindex_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e + m + (d+m)*500 + s2 = e + m + s1_rindex = s1.rindex + for x in _RANGE_100: + s1_rindex(s2) + + +#### Same for partition + +@bench('("A"*1000).partition("A")', "early match, single character", 1000) +def partition_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("A"*1000).partition("B")', "no match, single character", 1000) +def partition_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + + +@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000) +def partition_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000) +def partition_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000) +def partition_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")', + "late match, 100 characters", 100) +def partition_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_partition = s1.partition + for x in _RANGE_100: + s1_partition(s2) + + +#### Same for rpartition + +@bench('("A"*1000).rpartition("A")', "early match, single character", 1000) +def rpartition_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("A"*1000).rpartition("B")', "no match, single character", 1000) +def rpartition_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + + +@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000) +def rpartition_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000) +def rpartition_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000) +def rpartition_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)', + "late match, 100 characters", 100) +def rpartition_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e + m + (d+m)*500 + s2 = e + m + s1_rpartition = s1.rpartition + for x in _RANGE_100: + s1_rpartition(s2) + + +#### Same for split(s, 1) + +@bench('("A"*1000).split("A", 1)', "early match, single character", 1000) +def split_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("A"*1000).split("B", 1)', "no match, single character", 1000) +def split_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + + +@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000) +def split_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000) +def split_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000) +def split_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)', + "late match, 100 characters", 100) +def split_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_split = s1.split + for x in _RANGE_100: + s1_split(s2, 1) + + +#### Same for rsplit(s, 1) + +@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000) +def rsplit_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000) +def rsplit_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + + +@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000) +def rsplit_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000) +def rsplit_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000) +def rsplit_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)', + "late match, 100 characters", 100) +def rsplit_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e + m + (d+m)*500 + s2 = e + m + s1_rsplit = s1.rsplit + for x in _RANGE_100: + s1_rsplit(s2, 1) + + +#### Benchmark the operator-based methods + +@bench('"A"*10', "repeat 1 character 10 times", 1000) +def repeat_single_10_times(STR): + s = STR("A") + for x in _RANGE_1000: + s * 10 + +@bench('"A"*1000', "repeat 1 character 1000 times", 1000) +def repeat_single_1000_times(STR): + s = STR("A") + for x in _RANGE_1000: + s * 1000 + +@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000) +def repeat_5_10_times(STR): + s = STR("ABCDE") + for x in _RANGE_1000: + s * 10 + +@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000) +def repeat_5_1000_times(STR): + s = STR("ABCDE") + for x in _RANGE_1000: + s * 1000 + +# + for concat + +@bench('"Andrew"+"Dalke"', "concat two strings", 1000) +def concat_two_strings(STR): + s1 = STR("Andrew") + s2 = STR("Dalke") + for x in _RANGE_1000: + s1+s2 + +@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15", + 1000) +def concat_many_strings(STR): + s1=STR('TIXSGYNREDCVBHJ') + s2=STR('PUMTLXBZVDO') + s3=STR('FVZNJ') + s4=STR('OGDXUW') + s5=STR('WEIMRNCOYVGHKB') + s6=STR('FCQTNMXPUZH') + s7=STR('TICZJYRLBNVUEAK') + s8=STR('REYB') + s9=STR('PWUOQ') + s10=STR('EQHCMKBS') + s11=STR('AEVDFOH') + s12=STR('IFHVD') + s13=STR('JGTCNLXWOHQ') + s14=STR('ITSKEPYLROZAWXF') + s15=STR('THEK') + s16=STR('GHPZFBUYCKMNJIT') + s17=STR('JMUZ') + s18=STR('WLZQMTB') + s19=STR('KPADCBW') + s20=STR('TNJHZQAGBU') + for x in _RANGE_1000: + (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+ + s11+s12+s13+s14+s15+s16+s17+s18+s19+s20) + + +#### Benchmark join + +def get_bytes_yielding_seq(STR, arg): + if STR is BYTES and sys.version_info >= (3,): + raise UnsupportedType + return STR(arg) + +@bench('"A".join("")', + "join empty string, with 1 character sep", 100) +def join_empty_single(STR): + sep = STR("A") + s2 = get_bytes_yielding_seq(STR, "") + sep_join = sep.join + for x in _RANGE_100: + sep_join(s2) + +@bench('"ABCDE".join("")', + "join empty string, with 5 character sep", 100) +def join_empty_5(STR): + sep = STR("ABCDE") + s2 = get_bytes_yielding_seq(STR, "") + sep_join = sep.join + for x in _RANGE_100: + sep_join(s2) + +@bench('"A".join("ABC..Z")', + "join string with 26 characters, with 1 character sep", 1000) +def join_alphabet_single(STR): + sep = STR("A") + s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ") + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"ABCDE".join("ABC..Z")', + "join string with 26 characters, with 5 character sep", 1000) +def join_alphabet_5(STR): + sep = STR("ABCDE") + s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ") + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"A".join(list("ABC..Z"))', + "join list of 26 characters, with 1 character sep", 1000) +def join_alphabet_list_single(STR): + sep = STR("A") + s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"] + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"ABCDE".join(list("ABC..Z"))', + "join list of 26 characters, with 5 character sep", 1000) +def join_alphabet_list_five(STR): + sep = STR("ABCDE") + s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"] + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"A".join(["Bob"]*100))', + "join list of 100 words, with 1 character sep", 1000) +def join_100_words_single(STR): + sep = STR("A") + s2 = [STR("Bob")]*100 + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"ABCDE".join(["Bob"]*100))', + "join list of 100 words, with 5 character sep", 1000) +def join_100_words_5(STR): + sep = STR("ABCDE") + s2 = [STR("Bob")]*100 + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +#### split tests + +@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000) +def whitespace_split(STR): + s = STR("Here are some words. "*2) + s_split = s.split + for x in _RANGE_1000: + s_split() + +@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000) +def whitespace_rsplit(STR): + s = STR("Here are some words. "*2) + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit() + +@bench('("Here are some words. "*2).split(None, 1)', + "split 1 whitespace", 1000) +def whitespace_split_1(STR): + s = STR("Here are some words. "*2) + s_split = s.split + N = None + for x in _RANGE_1000: + s_split(N, 1) + +@bench('("Here are some words. "*2).rsplit(None, 1)', + "split 1 whitespace", 1000) +def whitespace_rsplit_1(STR): + s = STR("Here are some words. "*2) + s_rsplit = s.rsplit + N = None + for x in _RANGE_1000: + s_rsplit(N, 1) + +@bench('("Here are some words. "*2).partition(" ")', + "split 1 whitespace", 1000) +def whitespace_partition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_partition = s.partition + for x in _RANGE_1000: + s_partition(sep) + +@bench('("Here are some words. "*2).rpartition(" ")', + "split 1 whitespace", 1000) +def whitespace_rpartition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_rpartition = s.rpartition + for x in _RANGE_1000: + s_rpartition(sep) + +human_text = """\ +Python is a dynamic object-oriented programming language that can be +used for many kinds of software development. It offers strong support +for integration with other languages and tools, comes with extensive +standard libraries, and can be learned in a few days. Many Python +programmers report substantial productivity gains and feel the language +encourages the development of higher quality, more maintainable code. + +Python runs on Windows, Linux/Unix, Mac OS X, OS/2, Amiga, Palm +Handhelds, and Nokia mobile phones. Python has also been ported to the +Java and .NET virtual machines. + +Python is distributed under an OSI-approved open source license that +makes it free to use, even for commercial products. +"""*25 +human_text_bytes = bytes_from_str(human_text) +human_text_unicode = unicode_from_str(human_text) +def _get_human_text(STR): + if STR is UNICODE: + return human_text_unicode + if STR is BYTES: + return human_text_bytes + raise AssertionError + +@bench('human_text.split()', "split whitespace (huge)", 10) +def whitespace_split_huge(STR): + s = _get_human_text(STR) + s_split = s.split + for x in _RANGE_10: + s_split() + +@bench('human_text.rsplit()', "split whitespace (huge)", 10) +def whitespace_rsplit_huge(STR): + s = _get_human_text(STR) + s_rsplit = s.rsplit + for x in _RANGE_10: + s_rsplit() + + + +@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000) +def newlines_split(STR): + s = STR("this\nis\na\ntest\n") + s_split = s.split + nl = STR("\n") + for x in _RANGE_1000: + s_split(nl) + + +@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000) +def newlines_rsplit(STR): + s = STR("this\nis\na\ntest\n") + s_rsplit = s.rsplit + nl = STR("\n") + for x in _RANGE_1000: + s_rsplit(nl) + +@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000) +def newlines_splitlines(STR): + s = STR("this\nis\na\ntest\n") + s_splitlines = s.splitlines + for x in _RANGE_1000: + s_splitlines() + +## split text with 2000 newlines + +def _make_2000_lines(): + import random + r = random.Random(100) + chars = list(map(chr, range(32, 128))) + i = 0 + while i < len(chars): + chars[i] = " " + i += r.randrange(9) + s = "".join(chars) + s = s*4 + words = [] + for i in range(2000): + start = r.randrange(96) + n = r.randint(5, 65) + words.append(s[start:start+n]) + return "\n".join(words)+"\n" + +_text_with_2000_lines = _make_2000_lines() +_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines) +_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines) +def _get_2000_lines(STR): + if STR is UNICODE: + return _text_with_2000_lines_unicode + if STR is BYTES: + return _text_with_2000_lines_bytes + raise AssertionError + + +@bench('"...text...".split("\\n")', "split 2000 newlines", 10) +def newlines_split_2000(STR): + s = _get_2000_lines(STR) + s_split = s.split + nl = STR("\n") + for x in _RANGE_10: + s_split(nl) + +@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10) +def newlines_rsplit_2000(STR): + s = _get_2000_lines(STR) + s_rsplit = s.rsplit + nl = STR("\n") + for x in _RANGE_10: + s_rsplit(nl) + +@bench('"...text...".splitlines()', "split 2000 newlines", 10) +def newlines_splitlines_2000(STR): + s = _get_2000_lines(STR) + s_splitlines = s.splitlines + for x in _RANGE_10: + s_splitlines() + + +## split text on "--" characters +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".split("--")', + "split on multicharacter separator (small)", 1000) +def split_multichar_sep_small(STR): + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_split = s.split + pat = STR("--") + for x in _RANGE_1000: + s_split(pat) +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")', + "split on multicharacter separator (small)", 1000) +def rsplit_multichar_sep_small(STR): + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_rsplit = s.rsplit + pat = STR("--") + for x in _RANGE_1000: + s_rsplit(pat) + +## split dna text on "ACTAT" characters +@bench('dna.split("ACTAT")', + "split on multicharacter separator (dna)", 10) +def split_multichar_sep_dna(STR): + s = _get_dna(STR) + s_split = s.split + pat = STR("ACTAT") + for x in _RANGE_10: + s_split(pat) + +@bench('dna.rsplit("ACTAT")', + "split on multicharacter separator (dna)", 10) +def rsplit_multichar_sep_dna(STR): + s = _get_dna(STR) + s_rsplit = s.rsplit + pat = STR("ACTAT") + for x in _RANGE_10: + s_rsplit(pat) + + + +## split with limits + +GFF3_example = "\t".join([ + "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".", + "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"]) + +@bench('GFF3_example.split("\\t")', "tab split", 1000) +def tab_split_no_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_split = s.split + for x in _RANGE_1000: + s_split(sep) + +@bench('GFF3_example.split("\\t", 8)', "tab split", 1000) +def tab_split_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_split = s.split + for x in _RANGE_1000: + s_split(sep, 8) + +@bench('GFF3_example.rsplit("\\t")', "tab split", 1000) +def tab_rsplit_no_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit(sep) + +@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000) +def tab_rsplit_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit(sep, 8) + +#### Count characters + +@bench('...text.with.2000.newlines.count("\\n")', + "count newlines", 10) +def count_newlines(STR): + s = _get_2000_lines(STR) + s_count = s.count + nl = STR("\n") + for x in _RANGE_10: + s_count(nl) + +# Orchid sequences concatenated, from Biopython +_dna = """ +CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT +AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG +AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT +TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC +AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG +TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT +CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT +TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT +GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC +TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG +GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA +ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC +CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA +ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA +ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA +TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG +CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG +GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA +ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG +ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC +ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA +GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA +TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG +TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT +TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG +GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG +GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT +AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC +GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG +TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT +CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA +TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC +TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC +AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT +GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT +GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA +CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG +GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA +TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG +ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT +GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA +AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC +AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA +ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC +GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC +GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC +AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA +GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG +ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC +GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC +""" +_dna = "".join(_dna.splitlines()) +_dna = _dna * 25 +_dna_bytes = bytes_from_str(_dna) +_dna_unicode = unicode_from_str(_dna) + +def _get_dna(STR): + if STR is UNICODE: + return _dna_unicode + if STR is BYTES: + return _dna_bytes + raise AssertionError + +@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10) +def count_aact(STR): + seq = _get_dna(STR) + seq_count = seq.count + needle = STR("AACT") + for x in _RANGE_10: + seq_count(needle) + +##### startswith and endswith + +@bench('"Andrew".startswith("A")', 'startswith single character', 1000) +def startswith_single(STR): + s1 = STR("Andrew") + s2 = STR("A") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + +@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters', + 1000) +def startswith_multiple(STR): + s1 = STR("Andrew") + s2 = STR("Andrew") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + +@bench('"Andrew".startswith("Anders")', + 'startswith multiple characters - not!', 1000) +def startswith_multiple_not(STR): + s1 = STR("Andrew") + s2 = STR("Anders") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + + +# endswith + +@bench('"Andrew".endswith("w")', 'endswith single character', 1000) +def endswith_single(STR): + s1 = STR("Andrew") + s2 = STR("w") + s1_endswith = s1.endswith + for x in _RANGE_1000: + s1_endswith(s2) + +@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000) +def endswith_multiple(STR): + s1 = STR("Andrew") + s2 = STR("Andrew") + s1_endswith = s1.endswith + for x in _RANGE_1000: + s1_endswith(s2) + +@bench('"Andrew".endswith("Anders")', + 'endswith multiple characters - not!', 1000) +def endswith_multiple_not(STR): + s1 = STR("Andrew") + s2 = STR("Anders") + s1_endswith = s1.endswith + for x in _RANGE_1000: + s1_endswith(s2) + +#### Strip + +@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000) +def terminal_newline_strip_right(STR): + s = STR("Hello!\n") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000) +def terminal_newline_rstrip(STR): + s = STR("Hello!\n") + s_rstrip = s.rstrip + for x in _RANGE_1000: + s_rstrip() + +@bench('"\\nHello!".strip()', 'strip terminal newline', 1000) +def terminal_newline_strip_left(STR): + s = STR("\nHello!") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000) +def terminal_newline_strip_both(STR): + s = STR("\nHello!\n") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000) +def terminal_newline_lstrip(STR): + s = STR("\nHello!") + s_lstrip = s.lstrip + for x in _RANGE_1000: + s_lstrip() + +@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s', + 'strip terminal newline', 1000) +def terminal_newline_if_else(STR): + s = STR("Hello!\n") + NL = STR("\n") + for x in _RANGE_1000: + s[:-1] if (s[-1] == NL) else s + + +# Strip multiple spaces or tabs + +@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000) +def terminal_space_strip(STR): + s = STR("Hello\t \t!") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000) +def terminal_space_rstrip(STR): + s = STR("Hello!\t \t") + s_rstrip = s.rstrip + for x in _RANGE_1000: + s_rstrip() + +@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000) +def terminal_space_lstrip(STR): + s = STR("\t \tHello!") + s_lstrip = s.lstrip + for x in _RANGE_1000: + s_lstrip() + + +#### replace +@bench('"This is a test".replace(" ", "\\t")', 'replace single character', + 1000) +def replace_single_character(STR): + s = STR("This is a test!") + from_str = STR(" ") + to_str = STR("\t") + s_replace = s.replace + for x in _RANGE_1000: + s_replace(from_str, to_str) + +@uses_re +@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character', + 1000) +def replace_single_character_re(STR): + s = STR("This is a test!") + pat = re.compile(STR(" ")) + to_str = STR("\t") + pat_sub = pat.sub + for x in _RANGE_1000: + pat_sub(to_str, s) + +@bench('"...text.with.2000.lines...replace("\\n", " ")', + 'replace single character, big string', 10) +def replace_single_character_big(STR): + s = _get_2000_lines(STR) + from_str = STR("\n") + to_str = STR(" ") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str) + +@uses_re +@bench('re.sub("\\n", " ", "...text.with.2000.lines...")', + 'replace single character, big string', 10) +def replace_single_character_big_re(STR): + s = _get_2000_lines(STR) + pat = re.compile(STR("\n")) + to_str = STR(" ") + pat_sub = pat.sub + for x in _RANGE_10: + pat_sub(to_str, s) + + +@bench('dna.replace("ATC", "ATT")', + 'replace multiple characters, dna', 10) +def replace_multiple_characters_dna(STR): + seq = _get_dna(STR) + from_str = STR("ATC") + to_str = STR("ATT") + seq_replace = seq.replace + for x in _RANGE_10: + seq_replace(from_str, to_str) + +# This increases the character count +@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")', + 'replace and expand multiple characters, big string', 10) +def replace_multiple_character_big(STR): + s = _get_2000_lines(STR) + from_str = STR("\n") + to_str = STR("\r\n") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str) + + +# This decreases the character count +@bench('"When shall we three meet again?".replace("ee", "")', + 'replace/remove multiple characters', 1000) +def replace_multiple_character_remove(STR): + s = STR("When shall we three meet again?") + from_str = STR("ee") + to_str = STR("") + s_replace = s.replace + for x in _RANGE_1000: + s_replace(from_str, to_str) + + +big_s = "A" + ("Z"*128*1024) +big_s_bytes = bytes_from_str(big_s) +big_s_unicode = unicode_from_str(big_s) +def _get_big_s(STR): + if STR is UNICODE: return big_s_unicode + if STR is BYTES: return big_s_bytes + raise AssertionError + +# The older replace implementation counted all matches in +# the string even when it only neeed to make one replacement. +@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)', + 'quick replace single character match', 10) +def quick_replace_single_match(STR): + s = _get_big_s(STR) + from_str = STR("A") + to_str = STR("BB") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str, 1) + +@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)', + 'quick replace multiple character match', 10) +def quick_replace_multiple_match(STR): + s = _get_big_s(STR) + from_str = STR("AZZ") + to_str = STR("BBZZ") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str, 1) + + +#### + +# CCP does a lot of this, for internationalisation of ingame messages. +_format = "The %(thing)s is %(place)s the %(location)s." +_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", } +_format_bytes = bytes_from_str(_format) +_format_unicode = unicode_from_str(_format) +_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items()) +_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items()) + +def _get_format(STR): + if STR is UNICODE: + return _format_unicode + if STR is BYTES: + if sys.version_info >= (3,): + raise UnsupportedType + return _format_bytes + raise AssertionError + +def _get_format_dict(STR): + if STR is UNICODE: + return _format_dict_unicode + if STR is BYTES: + if sys.version_info >= (3,): + raise UnsupportedType + return _format_dict_bytes + raise AssertionError + +# Formatting. +@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}', + 'formatting a string type with a dict', 1000) +def format_with_dict(STR): + s = _get_format(STR) + d = _get_format_dict(STR) + for x in _RANGE_1000: + s % d + + +#### Upper- and lower- case conversion + +@bench('("Where in the world is Carmen San Deigo?"*10).lower()', + "case conversion -- rare", 1000) +def lower_conversion_rare(STR): + s = STR("Where in the world is Carmen San Deigo?"*10) + s_lower = s.lower + for x in _RANGE_1000: + s_lower() + +@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()', + "case conversion -- dense", 1000) +def lower_conversion_dense(STR): + s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10) + s_lower = s.lower + for x in _RANGE_1000: + s_lower() + + +@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()', + "case conversion -- rare", 1000) +def upper_conversion_rare(STR): + s = STR("Where in the world is Carmen San Deigo?"*10) + s_upper = s.upper + for x in _RANGE_1000: + s_upper() + +@bench('("where in the world is carmen san deigo?"*10).upper()', + "case conversion -- dense", 1000) +def upper_conversion_dense(STR): + s = STR("where in the world is carmen san deigo?"*10) + s_upper = s.upper + for x in _RANGE_1000: + s_upper() + + +# end of benchmarks + +################# + +class BenchTimer(timeit.Timer): + def best(self, repeat=1): + for i in range(1, 10): + number = 10**i + x = self.timeit(number) + if x > 0.02: + break + times = [x] + for i in range(1, repeat): + times.append(self.timeit(number)) + return min(times) / number + +def main(): + (options, test_names) = parser.parse_args() + if options.bytes_only and options.unicode_only: + raise SystemExit("Only one of --8-bit and --unicode are allowed") + + bench_functions = [] + for (k,v) in globals().items(): + if hasattr(v, "is_bench"): + if test_names: + for name in test_names: + if name in v.group: + break + else: + # Not selected, ignore + continue + if options.skip_re and hasattr(v, "uses_re"): + continue + + bench_functions.append( (v.group, k, v) ) + bench_functions.sort() + + p("bytes\tunicode") + p("(in ms)\t(in ms)\t%\tcomment") + + bytes_total = uni_total = 0.0 + + for title, group in itertools.groupby(bench_functions, + operator.itemgetter(0)): + # Flush buffer before each group + sys.stdout.flush() + p("="*10, title) + for (_, k, v) in group: + if hasattr(v, "is_bench"): + bytes_time = 0.0 + bytes_time_s = " - " + if not options.unicode_only: + try: + bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,), + "import __main__").best(REPEAT) + bytes_time_s = "%.2f" % (1000 * bytes_time) + bytes_total += bytes_time + except UnsupportedType: + bytes_time_s = "N/A" + uni_time = 0.0 + uni_time_s = " - " + if not options.bytes_only: + try: + uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,), + "import __main__").best(REPEAT) + uni_time_s = "%.2f" % (1000 * uni_time) + uni_total += uni_time + except UnsupportedType: + uni_time_s = "N/A" + try: + average = bytes_time/uni_time + except (TypeError, ZeroDivisionError): + average = 0.0 + p("%s\t%s\t%.1f\t%s (*%d)" % ( + bytes_time_s, uni_time_s, 100.*average, + v.comment, v.repeat_count)) + + if bytes_total == uni_total == 0.0: + p("That was zippy!") + else: + try: + ratio = bytes_total/uni_total + except ZeroDivisionError: + ratio = 0.0 + p("%.2f\t%.2f\t%.1f\t%s" % ( + 1000*bytes_total, 1000*uni_total, 100.*ratio, + "TOTAL")) + +if __name__ == "__main__": + main() diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py index 01303db..6525ddf 100644 --- a/Tools/unicode/comparecodecs.py +++ b/Tools/unicode/comparecodecs.py @@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2): print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2)) mismatch = 0 # Check encoding - for i in range(sys.maxunicode): + for i in range(sys.maxunicode+1): u = chr(i) try: c1 = u.encode(encoding1) diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py index 7e7d6d0..f5a1af3 100644 --- a/Tools/unicode/gencodec.py +++ b/Tools/unicode/gencodec.py @@ -102,7 +102,7 @@ def readmap(filename): comment = '' else: comment = comment[1:].strip() - if enc < 256: + if not isinstance(enc, tuple) and enc < 256: if enc in unmapped: unmapped.remove(enc) if enc == uni: @@ -202,11 +202,10 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): # Analyze map and create table dict mappings = sorted(map.items()) table = {} - maxkey = 0 + maxkey = 255 if 'IDENTITY' in map: for key in range(256): table[key] = (key, '') - maxkey = 255 del map['IDENTITY'] for mapkey, mapvalue in mappings: mapcomment = '' @@ -224,6 +223,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): return None # Create table code + maxchar = 0 for key in range(maxkey + 1): if key not in table: mapvalue = MISSING_CODE @@ -238,6 +238,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): return None else: mapchar = chr(mapvalue) + maxchar = max(maxchar, ord(mapchar)) if mapcomment and comments: append(' %a \t# %s -> %s' % (mapchar, hexrepr(key, key_precision), @@ -245,6 +246,8 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): else: append(' %a' % mapchar) + if maxchar < 256: + append(' %a \t## Widen to UCS2 for optimization' % UNI_UNDEFINED) append(')') return l diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index d503190..d83cf63 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -21,17 +21,23 @@ # 2004-05-29 perky add east asian width information # 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta # 2008-06-11 gb add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch +# 2011-10-21 ezio add support for name aliases and named sequences +# 2012-01 benjamin add full case mappings # # written by Fredrik Lundh (fredrik@pythonware.com) # -import sys, os, zipfile +import os +import sys +import zipfile + +from textwrap import dedent SCRIPT = sys.argv[0] VERSION = "3.2" # The Unicode Database -UNIDATA_VERSION = "6.0.0" +UNIDATA_VERSION = "6.1.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" EASTASIAN_WIDTH = "EastAsianWidth%s.txt" @@ -39,6 +45,19 @@ UNIHAN = "Unihan%s.zip" DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt" DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt" LINE_BREAK = "LineBreak%s.txt" +NAME_ALIASES = "NameAliases%s.txt" +NAMED_SEQUENCES = "NamedSequences%s.txt" +SPECIAL_CASING = "SpecialCasing%s.txt" +CASE_FOLDING = "CaseFolding%s.txt" + +# Private Use Areas -- in planes 1, 15, 16 +PUA_1 = range(0xE000, 0xF900) +PUA_15 = range(0xF0000, 0xFFFFE) +PUA_16 = range(0x100000, 0x10FFFE) + +# we use this ranges of PUA_15 to store name aliases and named sequences +NAME_ALIASES_START = 0xF0000 +NAMED_SEQUENCES_START = 0xF0200 old_versions = ["3.2.0"] @@ -67,13 +86,15 @@ UPPER_MASK = 0x80 XID_START_MASK = 0x100 XID_CONTINUE_MASK = 0x200 PRINTABLE_MASK = 0x400 -NODELTA_MASK = 0x800 -NUMERIC_MASK = 0x1000 +NUMERIC_MASK = 0x800 +CASE_IGNORABLE_MASK = 0x1000 +CASED_MASK = 0x2000 +EXTENDED_CASE_MASK = 0x4000 # these ranges need to match unicodedata.c:is_unified_ideograph cjk_ranges = [ ('3400', '4DB5'), - ('4E00', '9FCB'), + ('4E00', '9FCC'), ('20000', '2A6D6'), ('2A700', '2B734'), ('2B740', '2B81D') @@ -367,6 +388,7 @@ def makeunicodetype(unicode, trace): numeric = {} spaces = [] linebreaks = [] + extra_casing = [] for char in unicode.chars: record = unicode.table[char] @@ -379,7 +401,7 @@ def makeunicodetype(unicode, trace): delta = True if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]: flags |= ALPHA_MASK - if category == "Ll": + if "Lowercase" in properties: flags |= LOWER_MASK if 'Line_Break' in properties or bidirectional == "B": flags |= LINEBREAK_MASK @@ -389,7 +411,7 @@ def makeunicodetype(unicode, trace): spaces.append(char) if category == "Lt": flags |= TITLE_MASK - if category == "Lu": + if "Uppercase" in properties: flags |= UPPER_MASK if char == ord(" ") or category[0] not in ("C", "Z"): flags |= PRINTABLE_MASK @@ -397,7 +419,12 @@ def makeunicodetype(unicode, trace): flags |= XID_START_MASK if "XID_Continue" in properties: flags |= XID_CONTINUE_MASK - # use delta predictor for upper/lower/title if it fits + if "Cased" in properties: + flags |= CASED_MASK + if "Case_Ignorable" in properties: + flags |= CASE_IGNORABLE_MASK + sc = unicode.special_casing.get(char) + cf = unicode.case_folding.get(char, [char]) if record[12]: upper = int(record[12], 16) else: @@ -409,23 +436,39 @@ def makeunicodetype(unicode, trace): if record[14]: title = int(record[14], 16) else: - # UCD.html says that a missing title char means that - # it defaults to the uppercase character, not to the - # character itself. Apparently, in the current UCD (5.x) - # this feature is never used title = upper - upper_d = upper - char - lower_d = lower - char - title_d = title - char - if -32768 <= upper_d <= 32767 and \ - -32768 <= lower_d <= 32767 and \ - -32768 <= title_d <= 32767: - # use deltas - upper = upper_d & 0xffff - lower = lower_d & 0xffff - title = title_d & 0xffff + if sc is None and cf != [lower]: + sc = ([lower], [title], [upper]) + if sc is None: + if upper == lower == title: + upper = lower = title = 0 + else: + upper = upper - char + lower = lower - char + title = title - char + assert (abs(upper) <= 2147483647 and + abs(lower) <= 2147483647 and + abs(title) <= 2147483647) else: - flags |= NODELTA_MASK + # This happens either when some character maps to more than one + # character in uppercase, lowercase, or titlecase or the + # casefolded version of the character is different from the + # lowercase. The extra characters are stored in a different + # array. + flags |= EXTENDED_CASE_MASK + lower = len(extra_casing) | (len(sc[0]) << 24) + extra_casing.extend(sc[0]) + if cf != sc[0]: + lower |= len(cf) << 20 + extra_casing.extend(cf) + upper = len(extra_casing) | (len(sc[2]) << 24) + extra_casing.extend(sc[2]) + # Title is probably equal to upper. + if sc[1] == sc[2]: + title = upper + else: + title = len(extra_casing) | (len(sc[1]) << 24) + extra_casing.extend(sc[1]) # decimal digit, integer digit decimal = 0 if record[6]: @@ -452,6 +495,7 @@ def makeunicodetype(unicode, trace): print(sum(map(len, numeric.values())), "numeric code points") print(len(spaces), "whitespace code points") print(len(linebreaks), "linebreak code points") + print(len(extra_casing), "extended case array") print("--- Writing", FILE, "...") @@ -465,6 +509,14 @@ def makeunicodetype(unicode, trace): print("};", file=fp) print(file=fp) + print("/* extended case mappings */", file=fp) + print(file=fp) + print("const Py_UCS4 _PyUnicode_ExtendedCase[] = {", file=fp) + for c in extra_casing: + print(" %d," % c, file=fp) + print("};", file=fp) + print(file=fp) + # split decomposition index table index1, index2, shift = splitbins(index, trace) @@ -692,6 +744,39 @@ def makeunicodename(unicode, trace): print("/* name->code dictionary */", file=fp) codehash.dump(fp, trace) + print(file=fp) + print('static const unsigned int aliases_start = %#x;' % + NAME_ALIASES_START, file=fp) + print('static const unsigned int aliases_end = %#x;' % + (NAME_ALIASES_START + len(unicode.aliases)), file=fp) + + print('static const unsigned int name_aliases[] = {', file=fp) + for name, codepoint in unicode.aliases: + print(' 0x%04X,' % codepoint, file=fp) + print('};', file=fp) + + # In Unicode 6.0.0, the sequences contain at most 4 BMP chars, + # so we are using Py_UCS2 seq[4]. This needs to be updated if longer + # sequences or sequences with non-BMP chars are added. + # unicodedata_lookup should be adapted too. + print(dedent(""" + typedef struct NamedSequence { + int seqlen; + Py_UCS2 seq[4]; + } named_sequence; + """), file=fp) + + print('static const unsigned int named_sequences_start = %#x;' % + NAMED_SEQUENCES_START, file=fp) + print('static const unsigned int named_sequences_end = %#x;' % + (NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=fp) + + print('static const named_sequence named_sequences[] = {', file=fp) + for name, sequence in unicode.named_sequences: + seq_str = ', '.join('0x%04X' % cp for cp in sequence) + print(' {%d, {%s}},' % (len(sequence), seq_str), file=fp) + print('};', file=fp) + fp.close() @@ -726,7 +811,11 @@ def merge_old_version(version, new, old): for k in range(len(old.table[i])): if old.table[i][k] != new.table[i][k]: value = old.table[i][k] - if k == 2: + if k == 1 and i in PUA_15: + # the name is not set in the old.table, but in the + # new.table we are using it for aliases and named seq + assert value == '' + elif k == 2: #print "CATEGORY",hex(i), old.table[i][k], new.table[i][k] category_changes[i] = CATEGORY_NAMES.index(value) elif k == 4: @@ -816,15 +905,15 @@ class UnicodeData: expand=1, cjk_check=True): self.changed = [] - file = open_data(UNICODE_DATA, version) table = [None] * 0x110000 - while 1: - s = file.readline() - if not s: - break - s = s.strip().split(";") - char = int(s[0], 16) - table[char] = s + with open_data(UNICODE_DATA, version) as file: + while 1: + s = file.readline() + if not s: + break + s = s.strip().split(";") + char = int(s[0], 16) + table[char] = s cjk_ranges_found = [] @@ -855,32 +944,79 @@ class UnicodeData: self.table = table self.chars = list(range(0x110000)) # unicode 3.2 - file = open_data(COMPOSITION_EXCLUSIONS, version) + # check for name aliases and named sequences, see #12753 + # aliases and named sequences are not in 3.2.0 + if version != '3.2.0': + self.aliases = [] + # store aliases in the Private Use Area 15, in range U+F0000..U+F00FF, + # in order to take advantage of the compression and lookup + # algorithms used for the other characters + pua_index = NAME_ALIASES_START + with open_data(NAME_ALIASES, version) as file: + for s in file: + s = s.strip() + if not s or s.startswith('#'): + continue + char, name, abbrev = s.split(';') + char = int(char, 16) + self.aliases.append((name, char)) + # also store the name in the PUA 1 + self.table[pua_index][1] = name + pua_index += 1 + assert pua_index - NAME_ALIASES_START == len(self.aliases) + + self.named_sequences = [] + # store named seqences in the PUA 1, in range U+F0100.., + # in order to take advantage of the compression and lookup + # algorithms used for the other characters. + + assert pua_index < NAMED_SEQUENCES_START + pua_index = NAMED_SEQUENCES_START + with open_data(NAMED_SEQUENCES, version) as file: + for s in file: + s = s.strip() + if not s or s.startswith('#'): + continue + name, chars = s.split(';') + chars = tuple(int(char, 16) for char in chars.split()) + # check that the structure defined in makeunicodename is OK + assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size" + assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in " + "the NamedSequence struct and in unicodedata_lookup") + self.named_sequences.append((name, chars)) + # also store these in the PUA 1 + self.table[pua_index][1] = name + pua_index += 1 + assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences) + self.exclusions = {} - for s in file: - s = s.strip() - if not s: - continue - if s[0] == '#': - continue - char = int(s.split()[0],16) - self.exclusions[char] = 1 + with open_data(COMPOSITION_EXCLUSIONS, version) as file: + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + char = int(s.split()[0],16) + self.exclusions[char] = 1 widths = [None] * 0x110000 - for s in open_data(EASTASIAN_WIDTH, version): - s = s.strip() - if not s: - continue - if s[0] == '#': - continue - s = s.split()[0].split(';') - if '..' in s[0]: - first, last = [int(c, 16) for c in s[0].split('..')] - chars = list(range(first, last+1)) - else: - chars = [int(s[0], 16)] - for char in chars: - widths[char] = s[1] + with open_data(EASTASIAN_WIDTH, version) as file: + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + s = s.split()[0].split(';') + if '..' in s[0]: + first, last = [int(c, 16) for c in s[0].split('..')] + chars = list(range(first, last+1)) + else: + chars = [int(s[0], 16)] + for char in chars: + widths[char] = s[1] + for i in range(0, 0x110000): if table[i] is not None: table[i].append(widths[i]) @@ -888,36 +1024,39 @@ class UnicodeData: for i in range(0, 0x110000): if table[i] is not None: table[i].append(set()) - for s in open_data(DERIVED_CORE_PROPERTIES, version): - s = s.split('#', 1)[0].strip() - if not s: - continue - r, p = s.split(";") - r = r.strip() - p = p.strip() - if ".." in r: - first, last = [int(c, 16) for c in r.split('..')] - chars = list(range(first, last+1)) - else: - chars = [int(r, 16)] - for char in chars: - if table[char]: - # Some properties (e.g. Default_Ignorable_Code_Point) - # apply to unassigned code points; ignore them - table[char][-1].add(p) - - for s in open_data(LINE_BREAK, version): - s = s.partition('#')[0] - s = [i.strip() for i in s.split(';')] - if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: - continue - if '..' not in s[0]: - first = last = int(s[0], 16) - else: - first, last = [int(c, 16) for c in s[0].split('..')] - for char in range(first, last+1): - table[char][-1].add('Line_Break') + with open_data(DERIVED_CORE_PROPERTIES, version) as file: + for s in file: + s = s.split('#', 1)[0].strip() + if not s: + continue + + r, p = s.split(";") + r = r.strip() + p = p.strip() + if ".." in r: + first, last = [int(c, 16) for c in r.split('..')] + chars = list(range(first, last+1)) + else: + chars = [int(r, 16)] + for char in chars: + if table[char]: + # Some properties (e.g. Default_Ignorable_Code_Point) + # apply to unassigned code points; ignore them + table[char][-1].add(p) + + with open_data(LINE_BREAK, version) as file: + for s in file: + s = s.partition('#')[0] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: + continue + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + table[char][-1].add('Line_Break') # We only want the quickcheck properties # Format: NF?_QC; Y(es)/N(o)/M(aybe) @@ -928,31 +1067,33 @@ class UnicodeData: # for older versions, and no delta records will be created. quickchecks = [0] * 0x110000 qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split() - for s in open_data(DERIVEDNORMALIZATION_PROPS, version): - if '#' in s: - s = s[:s.index('#')] - s = [i.strip() for i in s.split(';')] - if len(s) < 2 or s[1] not in qc_order: - continue - quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No - quickcheck_shift = qc_order.index(s[1])*2 - quickcheck <<= quickcheck_shift - if '..' not in s[0]: - first = last = int(s[0], 16) - else: - first, last = [int(c, 16) for c in s[0].split('..')] - for char in range(first, last+1): - assert not (quickchecks[char]>>quickcheck_shift)&3 - quickchecks[char] |= quickcheck + with open_data(DERIVEDNORMALIZATION_PROPS, version) as file: + for s in file: + if '#' in s: + s = s[:s.index('#')] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in qc_order: + continue + quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No + quickcheck_shift = qc_order.index(s[1])*2 + quickcheck <<= quickcheck_shift + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + assert not (quickchecks[char]>>quickcheck_shift)&3 + quickchecks[char] |= quickcheck for i in range(0, 0x110000): if table[i] is not None: table[i].append(quickchecks[i]) - zip = zipfile.ZipFile(open_data(UNIHAN, version)) - if version == '3.2.0': - data = zip.open('Unihan-3.2.0.txt').read() - else: - data = zip.open('Unihan_NumericValues.txt').read() + with open_data(UNIHAN, version) as file: + zip = zipfile.ZipFile(file) + if version == '3.2.0': + data = zip.open('Unihan-3.2.0.txt').read() + else: + data = zip.open('Unihan_NumericValues.txt').read() for line in data.decode("utf-8").splitlines(): if not line.startswith('U+'): continue @@ -965,6 +1106,34 @@ class UnicodeData: # Patch the numeric field if table[i] is not None: table[i][8] = value + sc = self.special_casing = {} + with open_data(SPECIAL_CASING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[4]: + # We ignore all conditionals (since they depend on + # languages) except for one, which is hardcoded. See + # handle_capital_sigma in unicodeobject.c. + continue + c = int(data[0], 16) + lower = [int(char, 16) for char in data[1].split()] + title = [int(char, 16) for char in data[2].split()] + upper = [int(char, 16) for char in data[3].split()] + sc[c] = (lower, title, upper) + cf = self.case_folding = {} + if version != '3.2.0': + with open_data(CASE_FOLDING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[1] in "CF": + c = int(data[0], 16) + cf[c] = [int(char, 16) for char in data[2].split()] def uselatin1(self): # restrict character range to ISO Latin 1 diff --git a/Tools/unittestgui/unittestgui.py b/Tools/unittestgui/unittestgui.py index b526646..09a20e2 100644 --- a/Tools/unittestgui/unittestgui.py +++ b/Tools/unittestgui/unittestgui.py @@ -28,7 +28,6 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ __author__ = "Steve Purcell (stephen_purcell@yahoo.com)" -__version__ = "$Revision: 1.7 $"[11:-2] import sys import traceback |