diff options
Diffstat (limited to 'Tools')
63 files changed, 3447 insertions, 799 deletions
diff --git a/Tools/README b/Tools/README index c1f89ba..0d961de 100644 --- a/Tools/README +++ b/Tools/README @@ -3,7 +3,7 @@ while building or extending Python. buildbot Batchfiles for running on Windows buildslaves. -ccbench A Python concurrency benchmark. +ccbench A Python threads-based concurrency benchmark. (*) demo Several Python programming demos. @@ -17,13 +17,13 @@ i18n Tools for internationalization. pygettext.py and msgfmt.py generates a binary message catalog from a catalog in text format. -iobench Benchmark for the new Python I/O system. +iobench Benchmark for the new Python I/O system. (*) msi Support for packaging Python as an MSI package on Windows. parser Un-parsing tool to generate code from an AST. -pybench Comprehensive Python benchmarking suite. +pybench Low-level benchmarking for the Python evaluation loop. (*) pynche A Tkinter-based color editor. @@ -32,6 +32,9 @@ scripts A number of useful single-file programs, e.g. tabnanny.py tabs and spaces, and 2to3, which converts Python 2 code to Python 3 code. +stringbench A suite of micro-benchmarks for various operations on + strings (both 8-bit and unicode). (*) + test2to3 A demonstration of how to use 2to3 transparently in setup.py. unicode Tools for generating unicodedata and codecs from unicode.org @@ -40,3 +43,6 @@ unicode Tools for generating unicodedata and codecs from unicode.org unittestgui A Tkinter based GUI test runner for unittest, with test discovery. + + +(*) A generic benchmark suite is maintained separately at http://hg.python.org/benchmarks/ diff --git a/Tools/buildbot/build-amd64.bat b/Tools/buildbot/build-amd64.bat index 8713b38..493e74d 100644 --- a/Tools/buildbot/build-amd64.bat +++ b/Tools/buildbot/build-amd64.bat @@ -1,7 +1,6 @@ @rem Used by the buildbot "compile" step. -set HOST_PYTHON="%CD%\PCbuild\amd64\python_d.exe" cmd /c Tools\buildbot\external-amd64.bat -call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 +call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 cmd /c Tools\buildbot\clean-amd64.bat -vcbuild /useenv PCbuild\kill_python.vcproj "Debug|x64" && PCbuild\amd64\kill_python_d.exe -vcbuild PCbuild\pcbuild.sln "Debug|x64" + +msbuild PCbuild\pcbuild.sln /p:Configuration=Debug /p:Platform=x64 diff --git a/Tools/buildbot/build.bat b/Tools/buildbot/build.bat index ab3ea7d..be79b10 100644 --- a/Tools/buildbot/build.bat +++ b/Tools/buildbot/build.bat @@ -1,7 +1,7 @@ @rem Used by the buildbot "compile" step. cmd /c Tools\buildbot\external.bat -call "%VS90COMNTOOLS%vsvars32.bat" +call "%VS100COMNTOOLS%vsvars32.bat" cmd /c Tools\buildbot\clean.bat -vcbuild /useenv PCbuild\kill_python.vcproj "Debug|Win32" && PCbuild\kill_python_d.exe -vcbuild /useenv PCbuild\pcbuild.sln "Debug|Win32" + +msbuild PCbuild\pcbuild.sln /p:Configuration=Debug /p:Platform=Win32 diff --git a/Tools/buildbot/buildmsi.bat b/Tools/buildbot/buildmsi.bat index 4430cb8..ae93e67 100644 --- a/Tools/buildbot/buildmsi.bat +++ b/Tools/buildbot/buildmsi.bat @@ -2,10 +2,10 @@ cmd /c Tools\buildbot\external.bat @rem build release versions of things -call "%VS90COMNTOOLS%vsvars32.bat" +call "%VS100COMNTOOLS%vsvars32.bat" @rem build Python -vcbuild /useenv PCbuild\pcbuild.sln "Release|Win32" +msbuild /p:useenv=true PCbuild\pcbuild.sln /p:Configuration=Release /p:Platform=Win32 @rem build the documentation bash.exe -c 'cd Doc;make PYTHON=python2.5 update htmlhelp' diff --git a/Tools/buildbot/clean-amd64.bat b/Tools/buildbot/clean-amd64.bat index 715805a..24660af 100644 --- a/Tools/buildbot/clean-amd64.bat +++ b/Tools/buildbot/clean-amd64.bat @@ -1,10 +1,10 @@ @rem Used by the buildbot "clean" step. -call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 +call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 @echo Deleting .pyc/.pyo files ... del /s Lib\*.pyc Lib\*.pyo @echo Deleting test leftovers ... rmdir /s /q build cd PCbuild -vcbuild /clean pcbuild.sln "Release|x64" -vcbuild /clean pcbuild.sln "Debug|x64" +msbuild /target:clean pcbuild.sln /p:Configuration=Release /p:PlatformTarget=x64 +msbuild /target:clean pcbuild.sln /p:Configuration=Debug /p:PlatformTarget=x64 cd .. diff --git a/Tools/buildbot/clean.bat b/Tools/buildbot/clean.bat index 0c04b8e..218facc 100644 --- a/Tools/buildbot/clean.bat +++ b/Tools/buildbot/clean.bat @@ -1,8 +1,8 @@ @rem Used by the buildbot "clean" step. -call "%VS90COMNTOOLS%vsvars32.bat" +call "%VS100COMNTOOLS%vsvars32.bat" @echo Deleting test leftovers ... rmdir /s /q build cd PCbuild -vcbuild /clean pcbuild.sln "Release|Win32" -vcbuild /clean pcbuild.sln "Debug|Win32" +msbuild /target:clean pcbuild.sln /p:Configuration=Release /p:PlatformTarget=x86 +msbuild /target:clean pcbuild.sln /p:Configuration=Debug /p:PlatformTarget=x86 cd .. diff --git a/Tools/buildbot/external-amd64.bat b/Tools/buildbot/external-amd64.bat index 954238e..d2ff255 100644 --- a/Tools/buildbot/external-amd64.bat +++ b/Tools/buildbot/external-amd64.bat @@ -2,20 +2,20 @@ @rem Assume we start inside the Python source directory call "Tools\buildbot\external-common.bat" -call "%VS90COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 +call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 if not exist tcltk64\bin\tcl85g.dll ( - cd tcl-8.5.9.0\win + cd tcl-8.5.11.0\win nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 clean all nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 install cd ..\.. ) if not exist tcltk64\bin\tk85g.dll ( - cd tk-8.5.9.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.9.0 clean - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.9.0 all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.9.0 install + cd tk-8.5.11.0\win + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 clean + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 all + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 install cd ..\.. ) diff --git a/Tools/buildbot/external-common.bat b/Tools/buildbot/external-common.bat index f031c1c..c6278b2 100644 --- a/Tools/buildbot/external-common.bat +++ b/Tools/buildbot/external-common.bat @@ -14,8 +14,8 @@ cd .. @rem if exist tk8.4.16 rd /s/q tk8.4.16 @rem if exist tk-8.4.18.1 rd /s/q tk-8.4.18.1 @rem if exist db-4.4.20 rd /s/q db-4.4.20 -@rem if exist openssl-1.0.0k rd /s/q openssl-1.0.0k -@rem if exist sqlite-3.7.4 rd /s/q sqlite-3.7.4 +@rem if exist openssl-1.0.1e rd /s/q openssl-1.0.1e +@rem if exist sqlite-3.7.12 rd /s/q sqlite-3.7.12 @rem bzip if not exist bzip2-1.0.6 ( @@ -24,17 +24,25 @@ if not exist bzip2-1.0.6 ( ) @rem OpenSSL -if not exist openssl-1.0.0k svn export http://svn.python.org/projects/external/openssl-1.0.0k +if not exist openssl-1.0.1e ( + rd /s/q openssl-1.0.1d + svn export http://svn.python.org/projects/external/openssl-1.0.1e +) @rem tcl/tk -if not exist tcl-8.5.9.0 ( +if not exist tcl-8.5.11.0 ( rd /s/q tcltk tcltk64 - svn export http://svn.python.org/projects/external/tcl-8.5.9.0 + svn export http://svn.python.org/projects/external/tcl-8.5.11.0 ) -if not exist tk-8.5.9.0 svn export http://svn.python.org/projects/external/tk-8.5.9.0 +if not exist tk-8.5.11.0 svn export http://svn.python.org/projects/external/tk-8.5.11.0 @rem sqlite3 -if not exist sqlite-3.7.4 ( - rd /s/q sqlite-source-3.6.21 - svn export http://svn.python.org/projects/external/sqlite-3.7.4 +if not exist sqlite-3.7.12 ( + rd /s/q sqlite-source-3.7.4 + svn export http://svn.python.org/projects/external/sqlite-3.7.12 +) + +@rem lzma +if not exist xz-5.0.3 ( + svn export http://svn.python.org/projects/external/xz-5.0.3 ) diff --git a/Tools/buildbot/external.bat b/Tools/buildbot/external.bat index e958fd6..ed5c10e 100644 --- a/Tools/buildbot/external.bat +++ b/Tools/buildbot/external.bat @@ -2,20 +2,20 @@ @rem Assume we start inside the Python source directory call "Tools\buildbot\external-common.bat" -call "%VS90COMNTOOLS%\vsvars32.bat" +call "%VS100COMNTOOLS%\vsvars32.bat" if not exist tcltk\bin\tcl85g.dll ( @rem all and install need to be separate invocations, otherwise nmakehlp is not found on install - cd tcl-8.5.9.0\win + cd tcl-8.5.11.0\win nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 INSTALLDIR=..\..\tcltk clean all nmake -f makefile.vc DEBUG=1 INSTALLDIR=..\..\tcltk install cd ..\.. ) if not exist tcltk\bin\tk85g.dll ( - cd tk-8.5.9.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.9.0 clean - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.9.0 all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.9.0 install + cd tk-8.5.11.0\win + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 clean + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 all + nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 install cd ..\.. ) diff --git a/Tools/ccbench/ccbench.py b/Tools/ccbench/ccbench.py index 9f7118f..60cec3e 100644 --- a/Tools/ccbench/ccbench.py +++ b/Tools/ccbench/ccbench.py @@ -10,7 +10,6 @@ ccbench, a Python concurrency benchmark. import time import os import sys -import functools import itertools import threading import subprocess @@ -435,70 +434,70 @@ def run_bandwidth_client(**kwargs): def run_bandwidth_test(func, args, nthreads): # Create a listening socket to receive the packets. We use UDP which should # be painlessly cross-platform. - sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - sock.bind(("127.0.0.1", 0)) - addr = sock.getsockname() + with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock: + sock.bind(("127.0.0.1", 0)) + addr = sock.getsockname() - duration = BANDWIDTH_DURATION - packet_size = BANDWIDTH_PACKET_SIZE - - results = [] - threads = [] - end_event = [] - start_cond = threading.Condition() - started = False - if nthreads > 0: - # Warm up - func(*args) + duration = BANDWIDTH_DURATION + packet_size = BANDWIDTH_PACKET_SIZE results = [] - loop = TimedLoop(func, args) - ready = [] - ready_cond = threading.Condition() - - def run(): + threads = [] + end_event = [] + start_cond = threading.Condition() + started = False + if nthreads > 0: + # Warm up + func(*args) + + results = [] + loop = TimedLoop(func, args) + ready = [] + ready_cond = threading.Condition() + + def run(): + with ready_cond: + ready.append(None) + ready_cond.notify() + with start_cond: + while not started: + start_cond.wait() + loop(start_time, duration * 1.5, end_event, do_yield=False) + + for i in range(nthreads): + threads.append(threading.Thread(target=run)) + for t in threads: + t.setDaemon(True) + t.start() + # Wait for threads to be ready with ready_cond: - ready.append(None) - ready_cond.notify() - with start_cond: - while not started: - start_cond.wait() - loop(start_time, duration * 1.5, end_event, do_yield=False) - - for i in range(nthreads): - threads.append(threading.Thread(target=run)) - for t in threads: - t.setDaemon(True) - t.start() - # Wait for threads to be ready - with ready_cond: - while len(ready) < nthreads: - ready_cond.wait() - - # Run the client and wait for the first packet to arrive before - # unblocking the background threads. - process = run_bandwidth_client(addr=addr, - packet_size=packet_size, - duration=duration) - _time = time.time - # This will also wait for the parent to be ready - s = _recv(sock, packet_size) - remote_addr = eval(s.partition('#')[0]) - - with start_cond: - start_time = _time() - started = True - start_cond.notify(nthreads) - - n = 0 - first_time = None - while not end_event and BW_END not in s: - _sendto(sock, s, remote_addr) + while len(ready) < nthreads: + ready_cond.wait() + + # Run the client and wait for the first packet to arrive before + # unblocking the background threads. + process = run_bandwidth_client(addr=addr, + packet_size=packet_size, + duration=duration) + _time = time.time + # This will also wait for the parent to be ready s = _recv(sock, packet_size) - if first_time is None: - first_time = _time() - n += 1 - end_time = _time() + remote_addr = eval(s.partition('#')[0]) + + with start_cond: + start_time = _time() + started = True + start_cond.notify(nthreads) + + n = 0 + first_time = None + while not end_event and BW_END not in s: + _sendto(sock, s, remote_addr) + s = _recv(sock, packet_size) + if first_time is None: + first_time = _time() + n += 1 + end_time = _time() end_event.append(None) for t in threads: diff --git a/Tools/demo/eiffel.py b/Tools/demo/eiffel.py index 3a28224..736abea 100755 --- a/Tools/demo/eiffel.py +++ b/Tools/demo/eiffel.py @@ -36,7 +36,7 @@ class EiffelBaseMetaClass(type): pre = dict.get("%s_pre" % m) post = dict.get("%s_post" % m) if pre or post: - dict[k] = cls.make_eiffel_method(dict[m], pre, post) + dict[m] = cls.make_eiffel_method(dict[m], pre, post) class EiffelMetaClass1(EiffelBaseMetaClass): diff --git a/Tools/demo/life.py b/Tools/demo/life.py index dfb9ab8..fc4cb49 100755 --- a/Tools/demo/life.py +++ b/Tools/demo/life.py @@ -46,38 +46,38 @@ class LifeBoard: self.state = {} self.scr = scr Y, X = self.scr.getmaxyx() - self.X, self.Y = X-2, Y-2-1 + self.X, self.Y = X - 2, Y - 2 - 1 self.char = char self.scr.clear() # Draw a border around the board - border_line = '+'+(self.X*'-')+'+' + border_line = '+' + (self.X * '-') + '+' self.scr.addstr(0, 0, border_line) - self.scr.addstr(self.Y+1, 0, border_line) + self.scr.addstr(self.Y + 1, 0, border_line) for y in range(0, self.Y): - self.scr.addstr(1+y, 0, '|') - self.scr.addstr(1+y, self.X+1, '|') + self.scr.addstr(1 + y, 0, '|') + self.scr.addstr(1 + y, self.X + 1, '|') self.scr.refresh() def set(self, y, x): """Set a cell to the live state""" - if x<0 or self.X<=x or y<0 or self.Y<=y: - raise ValueError("Coordinates out of range %i,%i"% (y, x)) - self.state[x,y] = 1 + if x < 0 or self.X <= x or y < 0 or self.Y <= y: + raise ValueError("Coordinates out of range %i,%i" % (y, x)) + self.state[x, y] = 1 def toggle(self, y, x): """Toggle a cell's state between live and dead""" if x < 0 or self.X <= x or y < 0 or self.Y <= y: - raise ValueError("Coordinates out of range %i,%i"% (y, x)) + raise ValueError("Coordinates out of range %i,%i" % (y, x)) if (x, y) in self.state: del self.state[x, y] - self.scr.addch(y+1, x+1, ' ') + self.scr.addch(y + 1, x + 1, ' ') else: self.state[x, y] = 1 if curses.has_colors(): # Let's pick a random color! self.scr.attrset(curses.color_pair(random.randrange(1, 7))) - self.scr.addch(y+1, x+1, self.char) + self.scr.addch(y + 1, x + 1, self.char) self.scr.attrset(0) self.scr.refresh() @@ -88,43 +88,46 @@ class LifeBoard: def display(self, update_board=True): """Display the whole board, optionally computing one generation""" - M,N = self.X, self.Y + M, N = self.X, self.Y if not update_board: for i in range(0, M): for j in range(0, N): - if (i,j) in self.state: - self.scr.addch(j+1, i+1, self.char) + if (i, j) in self.state: + self.scr.addch(j + 1, i + 1, self.char) else: - self.scr.addch(j+1, i+1, ' ') + self.scr.addch(j + 1, i + 1, ' ') self.scr.refresh() return d = {} self.boring = 1 for i in range(0, M): - L = range( max(0, i-1), min(M, i+2) ) + L = range(max(0, i - 1), min(M, i + 2)) for j in range(0, N): s = 0 - live = (i,j) in self.state - for k in range( max(0, j-1), min(N, j+2) ): + live = (i, j) in self.state + for k in range(max(0, j - 1), min(N, j + 2)): for l in L: - if (l,k) in self.state: + if (l, k) in self.state: s += 1 s -= live if s == 3: # Birth - d[i,j] = 1 + d[i, j] = 1 if curses.has_colors(): # Let's pick a random color! self.scr.attrset(curses.color_pair( random.randrange(1, 7))) - self.scr.addch(j+1, i+1, self.char) + self.scr.addch(j + 1, i + 1, self.char) self.scr.attrset(0) - if not live: self.boring = 0 - elif s == 2 and live: d[i,j] = 1 # Survival + if not live: + self.boring = 0 + elif s == 2 and live: + # Survival + d[i, j] = 1 elif live: # Death - self.scr.addch(j+1, i+1, ' ') + self.scr.addch(j + 1, i + 1, ' ') self.boring = 0 self.state = d self.scr.refresh() @@ -135,16 +138,17 @@ class LifeBoard: for i in range(0, self.X): for j in range(0, self.Y): if random.random() > 0.5: - self.set(j,i) + self.set(j, i) def erase_menu(stdscr, menu_y): "Clear the space where the menu resides" stdscr.move(menu_y, 0) stdscr.clrtoeol() - stdscr.move(menu_y+1, 0) + stdscr.move(menu_y + 1, 0) stdscr.clrtoeol() + def display_menu(stdscr, menu_y): "Display the menu of possible keystroke commands" erase_menu(stdscr, menu_y) @@ -154,15 +158,16 @@ def display_menu(stdscr, menu_y): stdscr.attrset(curses.color_pair(1)) stdscr.addstr(menu_y, 4, 'Use the cursor keys to move, and space or Enter to toggle a cell.') - stdscr.addstr(menu_y+1, 4, + stdscr.addstr(menu_y + 1, 4, 'E)rase the board, R)andom fill, S)tep once or C)ontinuously, Q)uit') stdscr.attrset(0) + def keyloop(stdscr): # Clear the screen and display the menu of keys stdscr.clear() stdscr_y, stdscr_x = stdscr.getmaxyx() - menu_y = (stdscr_y-3)-1 + menu_y = (stdscr_y - 3) - 1 display_menu(stdscr, menu_y) # If color, then initialize the color pairs @@ -179,16 +184,16 @@ def keyloop(stdscr): curses.mousemask(curses.BUTTON1_CLICKED) # Allocate a subwindow for the Life board and create the board object - subwin = stdscr.subwin(stdscr_y-3, stdscr_x, 0, 0) + subwin = stdscr.subwin(stdscr_y - 3, stdscr_x, 0, 0) board = LifeBoard(subwin, char=ord('*')) board.display(update_board=False) # xpos, ypos are the cursor's position - xpos, ypos = board.X//2, board.Y//2 + xpos, ypos = board.X // 2, board.Y // 2 # Main loop: while True: - stdscr.move(1+ypos, 1+xpos) # Move the cursor + stdscr.move(1 + ypos, 1 + xpos) # Move the cursor c = stdscr.getch() # Get a keystroke if 0 < c < 256: c = chr(c) @@ -224,15 +229,21 @@ def keyloop(stdscr): board.display(update_board=False) elif c in 'Ss': board.display() - else: pass # Ignore incorrect keys - elif c == curses.KEY_UP and ypos > 0: ypos -= 1 - elif c == curses.KEY_DOWN and ypos < board.Y-1: ypos += 1 - elif c == curses.KEY_LEFT and xpos > 0: xpos -= 1 - elif c == curses.KEY_RIGHT and xpos < board.X-1: xpos += 1 + else: + # Ignore incorrect keys + pass + elif c == curses.KEY_UP and ypos > 0: + ypos -= 1 + elif c == curses.KEY_DOWN and ypos + 1 < board.Y: + ypos += 1 + elif c == curses.KEY_LEFT and xpos > 0: + xpos -= 1 + elif c == curses.KEY_RIGHT and xpos + 1 < board.X: + xpos += 1 elif c == curses.KEY_MOUSE: mouse_id, mouse_x, mouse_y, mouse_z, button_state = curses.getmouse() - if (mouse_x > 0 and mouse_x < board.X+1 and - mouse_y > 0 and mouse_y < board.Y+1): + if (mouse_x > 0 and mouse_x < board.X + 1 and + mouse_y > 0 and mouse_y < board.Y + 1): xpos = mouse_x - 1 ypos = mouse_y - 1 board.toggle(ypos, xpos) @@ -245,7 +256,7 @@ def keyloop(stdscr): def main(stdscr): - keyloop(stdscr) # Enter the main loop + keyloop(stdscr) # Enter the main loop if __name__ == '__main__': curses.wrapper(main) diff --git a/Tools/demo/ss1.py b/Tools/demo/ss1.py index 4cea667..649790f 100755 --- a/Tools/demo/ss1.py +++ b/Tools/demo/ss1.py @@ -7,8 +7,8 @@ SS1 -- a spreadsheet-like application. import os import re import sys -import html from xml.parsers import expat +from xml.sax.saxutils import escape LEFT, CENTER, RIGHT = "LEFT", "CENTER", "RIGHT" @@ -79,10 +79,10 @@ class Sheet: del self.cells[xy] def clearrows(self, y1, y2): - self.clearcells(0, y1, sys.maxint, y2) + self.clearcells(0, y1, sys.maxsize, y2) def clearcolumns(self, x1, x2): - self.clearcells(x1, 0, x2, sys.maxint) + self.clearcells(x1, 0, x2, sys.maxsize) def selectcells(self, x1, y1, x2, y2): if x1 > x2: @@ -113,23 +113,23 @@ class Sheet: def insertrows(self, y, n): assert n > 0 - self.movecells(0, y, sys.maxint, sys.maxint, 0, n) + self.movecells(0, y, sys.maxsize, sys.maxsize, 0, n) def deleterows(self, y1, y2): if y1 > y2: y1, y2 = y2, y1 self.clearrows(y1, y2) - self.movecells(0, y2+1, sys.maxint, sys.maxint, 0, y1-y2-1) + self.movecells(0, y2+1, sys.maxsize, sys.maxsize, 0, y1-y2-1) def insertcolumns(self, x, n): assert n > 0 - self.movecells(x, 0, sys.maxint, sys.maxint, n, 0) + self.movecells(x, 0, sys.maxsize, sys.maxsize, n, 0) def deletecolumns(self, x1, x2): if x1 > x2: x1, x2 = x2, x1 self.clearcells(x1, x2) - self.movecells(x2+1, 0, sys.maxint, sys.maxint, x1-x2-1, 0) + self.movecells(x2+1, 0, sys.maxsize, sys.maxsize, x1-x2-1, 0) def getsize(self): maxx = maxy = 0 @@ -205,7 +205,7 @@ class Sheet: if hasattr(cell, 'xml'): cellxml = cell.xml() else: - cellxml = '<value>%s</value>' % html.escape(cell) + cellxml = '<value>%s</value>' % escape(cell) out.append('<cell row="%s" col="%s">\n %s\n</cell>' % (y, x, cellxml)) out.append('</spreadsheet>') @@ -213,16 +213,14 @@ class Sheet: def save(self, filename): text = self.xml() - f = open(filename, "w") - f.write(text) - if text and not text.endswith('\n'): - f.write('\n') - f.close() + with open(filename, "w", encoding='utf-8') as f: + f.write(text) + if text and not text.endswith('\n'): + f.write('\n') def load(self, filename): - f = open(filename, 'rb') - SheetParser(self).parsefile(f) - f.close() + with open(filename, 'rb') as f: + SheetParser(self).parsefile(f) class SheetParser: @@ -239,13 +237,10 @@ class SheetParser: def startelement(self, tag, attrs): method = getattr(self, 'start_'+tag, None) if method: - for key, value in attrs.items(): - attrs[key] = str(value) # XXX Convert Unicode to 8-bit method(attrs) self.texts = [] def data(self, text): - text = str(text) # XXX Convert Unicode to 8-bit self.texts.append(text) def endelement(self, tag): @@ -269,11 +264,7 @@ class SheetParser: except: self.value = None - def end_long(self, text): - try: - self.value = int(text) - except: - self.value = None + end_long = end_int def end_double(self, text): try: @@ -288,10 +279,7 @@ class SheetParser: self.value = None def end_string(self, text): - try: - self.value = text - except: - self.value = None + self.value = text def end_value(self, text): if isinstance(self.value, BaseCell): @@ -328,7 +316,7 @@ class BaseCell: class NumericCell(BaseCell): def __init__(self, value, fmt="%s", alignment=RIGHT): - assert isinstance(value, (int, int, float, complex)) + assert isinstance(value, (int, float, complex)) assert alignment in (LEFT, CENTER, RIGHT) self.value = value self.fmt = fmt @@ -355,21 +343,18 @@ class NumericCell(BaseCell): if -2**31 <= self.value < 2**31: return '<int>%s</int>' % self.value else: - return self._xml_long() - - def _xml_long(self): - return '<long>%s</long>' % self.value + return '<long>%s</long>' % self.value def _xml_float(self): - return '<double>%s</double>' % repr(self.value) + return '<double>%r</double>' % self.value def _xml_complex(self): - return '<complex>%s</double>' % repr(self.value) + return '<complex>%r</complex>' % self.value class StringCell(BaseCell): def __init__(self, text, fmt="%s", alignment=LEFT): - assert isinstance(text, (str, str)) + assert isinstance(text, str) assert alignment in (LEFT, CENTER, RIGHT) self.text = text self.fmt = fmt @@ -386,7 +371,7 @@ class StringCell(BaseCell): return s % ( align2xml[self.alignment], self.fmt, - html.escape(self.text)) + escape(self.text)) class FormulaCell(BaseCell): @@ -404,7 +389,6 @@ class FormulaCell(BaseCell): def recalc(self, ns): if self.value is None: try: - # A hack to evaluate expressions using true division self.value = eval(self.translated, ns) except: exc = sys.exc_info()[0] @@ -425,7 +409,7 @@ class FormulaCell(BaseCell): return '<formula align="%s" format="%s">%s</formula>' % ( align2xml[self.alignment], self.fmt, - self.formula) + escape(self.formula)) def renumber(self, x1, y1, x2, y2, dx, dy): out = [] @@ -626,29 +610,29 @@ class SheetGUI: def selectall(self, event): self.setcurrent(1, 1) - self.setcorner(sys.maxint, sys.maxint) + self.setcorner(sys.maxsize, sys.maxsize) def selectcolumn(self, event): x, y = self.whichxy(event) self.setcurrent(x, 1) - self.setcorner(x, sys.maxint) + self.setcorner(x, sys.maxsize) def extendcolumn(self, event): x, y = self.whichxy(event) if x > 0: self.setcurrent(self.currentxy[0], 1) - self.setcorner(x, sys.maxint) + self.setcorner(x, sys.maxsize) def selectrow(self, event): x, y = self.whichxy(event) self.setcurrent(1, y) - self.setcorner(sys.maxint, y) + self.setcorner(sys.maxsize, y) def extendrow(self, event): x, y = self.whichxy(event) if y > 0: self.setcurrent(1, self.currentxy[1]) - self.setcorner(sys.maxint, y) + self.setcorner(sys.maxsize, y) def press(self, event): x, y = self.whichxy(event) @@ -709,14 +693,14 @@ class SheetGUI: self.setbeacon(x1, y1, x2, y2) def setbeacon(self, x1, y1, x2, y2): - if x1 == y1 == 1 and x2 == y2 == sys.maxint: + if x1 == y1 == 1 and x2 == y2 == sys.maxsize: name = ":" - elif (x1, x2) == (1, sys.maxint): + elif (x1, x2) == (1, sys.maxsize): if y1 == y2: name = "%d" % y1 else: name = "%d:%d" % (y1, y2) - elif (y1, y2) == (1, sys.maxint): + elif (y1, y2) == (1, sys.maxsize): if x1 == x2: name = "%s" % colnum2name(x1) else: @@ -776,7 +760,7 @@ class SheetGUI: if text.startswith('='): cell = FormulaCell(text[1:]) else: - for cls in int, int, float, complex: + for cls in int, float, complex: try: value = cls(text) except: @@ -812,7 +796,6 @@ class SheetGUI: def test_basic(): "Basic non-gui self-test." - import os a = Sheet() for x in range(1, 11): for y in range(1, 11): diff --git a/Tools/freeze/checkextensions_win32.py b/Tools/freeze/checkextensions_win32.py index a41542f..ee446e7 100644 --- a/Tools/freeze/checkextensions_win32.py +++ b/Tools/freeze/checkextensions_win32.py @@ -3,7 +3,7 @@ Under Windows it is unlikely the .obj files are of use, as special compiler options are needed (primarily to toggle the behavior of "public" symbols. -I dont consider it worth parsing the MSVC makefiles for compiler options. Even if +I don't consider it worth parsing the MSVC makefiles for compiler options. Even if we get it just right, a specific freeze application may have specific compiler options anyway (eg, to enable or disable specific functionality) @@ -14,7 +14,7 @@ So my basic strategy is: your own). * This description can include: - The MSVC .dsp file for the extension. The .c source file names - are extraced from there. + are extracted from there. - Specific compiler/linker options - Flag to indicate if Unicode compilation is expected. diff --git a/Tools/freeze/makefreeze.py b/Tools/freeze/makefreeze.py index 4cd1e96..ef18ec7 100644 --- a/Tools/freeze/makefreeze.py +++ b/Tools/freeze/makefreeze.py @@ -61,7 +61,7 @@ def makefreeze(base, dict, debug=0, entry_point=None, fail_import=()): outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size)) outfp.write('\n') # The following modules have a NULL code pointer, indicating - # that the prozen program should not search for them on the host + # that the frozen program should not search for them on the host # system. Importing them will *always* raise an ImportError. # The zero value size is never used. for mod in fail_import: diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 8bbbb10..1c2c3cb 100644..100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -40,29 +40,44 @@ the type names are known to the debugger The module also extends gdb with some python-specific commands. ''' -from __future__ import with_statement + +# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax +# compatible (2.6+ and 3.0+). See #19308. + +from __future__ import print_function, with_statement import gdb +import os import locale import sys +if sys.version_info[0] >= 3: + unichr = chr + xrange = range + long = int + # Look up the gdb.Type for some standard types: _type_char_ptr = gdb.lookup_type('char').pointer() # char* _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char* _type_void_ptr = gdb.lookup_type('void').pointer() # void* +_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer() +_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer() + +# value computed later, see PyUnicodeObjectPtr.proxy() +_is_pep393 = None SIZEOF_VOID_P = _type_void_ptr.sizeof -Py_TPFLAGS_HEAPTYPE = (1L << 9) +Py_TPFLAGS_HEAPTYPE = (1 << 9) -Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) -Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) -Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) -Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27) -Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) -Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) -Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) -Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) +Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) +Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) +Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) +Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) +Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) +Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) +Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) +Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) MAX_OUTPUT_LEN=1024 @@ -85,38 +100,45 @@ def safety_limit(val): def safe_range(val): # As per range, but don't trust the value too much: cap it to a safety # threshold in case the data was corrupted - return xrange(safety_limit(val)) - -def write_unicode(file, text): - # Write a byte or unicode string to file. Unicode strings are encoded to - # ENCODING encoding with 'backslashreplace' error handler to avoid - # UnicodeEncodeError. - if isinstance(text, unicode): - text = text.encode(ENCODING, 'backslashreplace') - file.write(text) - -def os_fsencode(filename): - if not isinstance(filename, unicode): - return filename - encoding = sys.getfilesystemencoding() - if encoding == 'mbcs': - # mbcs doesn't support surrogateescape - return filename.encode(encoding) - encoded = [] - for char in filename: - # surrogateescape error handler - if 0xDC80 <= ord(char) <= 0xDCFF: - byte = chr(ord(char) - 0xDC00) - else: - byte = char.encode(encoding) - encoded.append(byte) - return ''.join(encoded) + return xrange(safety_limit(int(val))) + +if sys.version_info[0] >= 3: + def write_unicode(file, text): + file.write(text) +else: + def write_unicode(file, text): + # Write a byte or unicode string to file. Unicode strings are encoded to + # ENCODING encoding with 'backslashreplace' error handler to avoid + # UnicodeEncodeError. + if isinstance(text, unicode): + text = text.encode(ENCODING, 'backslashreplace') + file.write(text) + +try: + os_fsencode = os.fsencode +except AttributeError: + def os_fsencode(filename): + if not isinstance(filename, unicode): + return filename + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + # mbcs doesn't support surrogateescape + return filename.encode(encoding) + encoded = [] + for char in filename: + # surrogateescape error handler + if 0xDC80 <= ord(char) <= 0xDCFF: + byte = chr(ord(char) - 0xDC00) + else: + byte = char.encode(encoding) + encoded.append(byte) + return ''.join(encoded) class StringTruncated(RuntimeError): pass class TruncatedStringIO(object): - '''Similar to cStringIO, but can truncate the output by raising a + '''Similar to io.StringIO, but can truncate the output by raising a StringTruncated exception''' def __init__(self, maxlen=None): self._val = '' @@ -317,12 +339,11 @@ class PyObjectPtr(object): # class return cls - #print 'tp_flags = 0x%08x' % tp_flags - #print 'tp_name = %r' % tp_name + #print('tp_flags = 0x%08x' % tp_flags) + #print('tp_name = %r' % tp_name) name_map = {'bool': PyBoolObjectPtr, 'classobj': PyClassObjectPtr, - 'instance': PyInstanceObjectPtr, 'NoneType': PyNoneStructPtr, 'frame': PyFrameObjectPtr, 'set' : PySetObjectPtr, @@ -396,7 +417,7 @@ class ProxyAlreadyVisited(object): def _write_instance_repr(out, visited, name, pyop_attrdict, address): - '''Shared code for use by old-style and new-style classes: + '''Shared code for use by all classes: write a representation to file-like object "out"''' out.write('<') out.write(name) @@ -479,7 +500,7 @@ class HeapTypeObjectPtr(PyObjectPtr): def proxyval(self, visited): ''' - Support for new-style classes. + Support for classes. Currently we just locate the dictionary using a transliteration to python of _PyObject_GetDictPtr, ignoring descriptors @@ -496,7 +517,7 @@ class HeapTypeObjectPtr(PyObjectPtr): attr_dict = {} tp_name = self.safe_tp_name() - # New-style class: + # Class: return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) def write_repr(self, out, visited): @@ -628,11 +649,16 @@ class PyDictObjectPtr(PyObjectPtr): def iteritems(self): ''' Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, - analagous to dict.iteritems() + analogous to dict.iteritems() ''' - for i in safe_range(self.field('ma_mask') + 1): - ep = self.field('ma_table') + i - pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + keys = self.field('ma_keys') + values = self.field('ma_values') + for i in safe_range(keys['dk_size']): + ep = keys['dk_entries'].address + i + if long(values): + pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) + else: + pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) if not pyop_value.is_null(): pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) yield (pyop_key, pyop_value) @@ -668,44 +694,6 @@ class PyDictObjectPtr(PyObjectPtr): pyop_value.write_repr(out, visited) out.write('}') -class PyInstanceObjectPtr(PyObjectPtr): - _typename = 'PyInstanceObject' - - def proxyval(self, visited): - # Guard against infinite loops: - if self.as_address() in visited: - return ProxyAlreadyVisited('<...>') - visited.add(self.as_address()) - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - in_dict = self.pyop_field('in_dict').proxyval(visited) - - # Old-style class: - return InstanceProxy(cl_name, in_dict, long(self._gdbval)) - - def write_repr(self, out, visited): - # Guard against infinite loops: - if self.as_address() in visited: - out.write('<...>') - return - visited.add(self.as_address()) - - # Old-style class: - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - pyop_in_dict = self.pyop_field('in_dict') - - _write_instance_repr(out, visited, - cl_name, pyop_in_dict, self.as_address()) - class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -762,14 +750,14 @@ class PyLongObjectPtr(PyObjectPtr): ''' ob_size = long(self.field('ob_size')) if ob_size == 0: - return 0L + return 0 ob_digit = self.field('ob_digit') if gdb.lookup_type('digit').sizeof == 2: - SHIFT = 15L + SHIFT = 15 else: - SHIFT = 30L + SHIFT = 30 digits = [long(ob_digit[i]) * 2**(SHIFT*i) for i in safe_range(abs(ob_size))] @@ -1123,15 +1111,46 @@ class PyUnicodeObjectPtr(PyObjectPtr): return _type_Py_UNICODE.sizeof def proxyval(self, visited): - # From unicodeobject.h: - # Py_ssize_t length; /* Length of raw Unicode data in buffer */ - # Py_UNICODE *str; /* Raw Unicode buffer */ - field_length = long(self.field('length')) - field_str = self.field('str') + global _is_pep393 + if _is_pep393 is None: + fields = gdb.lookup_type('PyUnicodeObject').target().fields() + _is_pep393 = 'data' in [f.name for f in fields] + if _is_pep393: + # Python 3.3 and newer + may_have_surrogates = False + compact = self.field('_base') + ascii = compact['_base'] + state = ascii['state'] + is_compact_ascii = (int(state['ascii']) and int(state['compact'])) + if not int(state['ready']): + # string is not ready + field_length = long(compact['wstr_length']) + may_have_surrogates = True + field_str = ascii['wstr'] + else: + field_length = long(ascii['length']) + if is_compact_ascii: + field_str = ascii.address + 1 + elif int(state['compact']): + field_str = compact.address + 1 + else: + field_str = self.field('data')['any'] + repr_kind = int(state['kind']) + if repr_kind == 1: + field_str = field_str.cast(_type_unsigned_char_ptr) + elif repr_kind == 2: + field_str = field_str.cast(_type_unsigned_short_ptr) + elif repr_kind == 4: + field_str = field_str.cast(_type_unsigned_int_ptr) + else: + # Python 3.2 and earlier + field_length = long(self.field('length')) + field_str = self.field('str') + may_have_surrogates = self.char_width() == 2 # Gather a list of ints from the Py_UNICODE array; these are either - # UCS-2 or UCS-4 code points: - if self.char_width() > 2: + # UCS-1, UCS-2 or UCS-4 code points: + if not may_have_surrogates: Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] else: # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the @@ -1330,7 +1349,7 @@ that this python file is installed to the same path as the library (or its /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py """ def register (obj): - if obj == None: + if obj is None: obj = gdb # Wire up the pretty-printer @@ -1388,6 +1407,23 @@ class Frame(object): iter_frame = iter_frame.newer() return index + # We divide frames into: + # - "python frames": + # - "bytecode frames" i.e. PyEval_EvalFrameEx + # - "other python frames": things that are of interest from a python + # POV, but aren't bytecode (e.g. GC, GIL) + # - everything else + + def is_python_frame(self): + '''Is this a PyEval_EvalFrameEx frame, or some other important + frame? (see is_other_python_frame for what "important" means in this + context)''' + if self.is_evalframeex(): + return True + if self.is_other_python_frame(): + return True + return False + def is_evalframeex(self): '''Is this a PyEval_EvalFrameEx frame?''' if self._gdbframe.name() == 'PyEval_EvalFrameEx': @@ -1404,6 +1440,49 @@ class Frame(object): return False + def is_other_python_frame(self): + '''Is this frame worth displaying in python backtraces? + Examples: + - waiting on the GIL + - garbage-collecting + - within a CFunction + If it is, return a descriptive string + For other frames, return False + ''' + if self.is_waiting_for_gil(): + return 'Waiting for the GIL' + elif self.is_gc_collect(): + return 'Garbage-collecting' + else: + # Detect invocations of PyCFunction instances: + older = self.older() + if older and older._gdbframe.name() == 'PyCFunction_Call': + # Within that frame: + # "func" is the local containing the PyObject* of the + # PyCFunctionObject instance + # "f" is the same value, but cast to (PyCFunctionObject*) + # "self" is the (PyObject*) of the 'self' + try: + # Use the prettyprinter for the func: + func = older._gdbframe.read_var('func') + return str(func) + except RuntimeError: + return 'PyCFunction invocation (unable to read "func")' + + # This frame isn't worth reporting: + return False + + def is_waiting_for_gil(self): + '''Is this frame waiting on the GIL?''' + # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: + name = self._gdbframe.name() + if name: + return 'pthread_cond_timedwait' in name + + def is_gc_collect(self): + '''Is this frame "collect" within the garbage-collector?''' + return self._gdbframe.name() == 'collect' + def get_pyop(self): try: f = self._gdbframe.read_var('f') @@ -1433,8 +1512,22 @@ class Frame(object): @classmethod def get_selected_python_frame(cls): - '''Try to obtain the Frame for the python code in the selected frame, - or None''' + '''Try to obtain the Frame for the python-related code in the selected + frame, or None''' + frame = cls.get_selected_frame() + + while frame: + if frame.is_python_frame(): + return frame + frame = frame.older() + + # Not found: + return None + + @classmethod + def get_selected_bytecode_frame(cls): + '''Try to obtain the Frame for the python bytecode interpreter in the + selected GDB frame, or None''' frame = cls.get_selected_frame() while frame: @@ -1458,7 +1551,11 @@ class Frame(object): else: sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) else: - sys.stdout.write('#%i\n' % self.get_index()) + info = self.is_other_python_frame() + if info: + sys.stdout.write('#%i %s\n' % (self.get_index(), info)) + else: + sys.stdout.write('#%i\n' % self.get_index()) def print_traceback(self): if self.is_evalframeex(): @@ -1472,7 +1569,11 @@ class Frame(object): else: sys.stdout.write(' (unable to read python frame information)\n') else: - sys.stdout.write(' (not a python frame)\n') + info = self.is_other_python_frame() + if info: + sys.stdout.write(' %s\n' % info) + else: + sys.stdout.write(' (not a python frame)\n') class PyList(gdb.Command): '''List the current Python source code, if any @@ -1508,14 +1609,15 @@ class PyList(gdb.Command): if m: start, end = map(int, m.groups()) - frame = Frame.get_selected_python_frame() + # py-list requires an actual PyEval_EvalFrameEx frame: + frame = Frame.get_selected_bytecode_frame() if not frame: - print 'Unable to locate python frame' + print('Unable to locate gdb frame for python bytecode interpreter') return pyop = frame.get_pyop() if not pyop or pyop.is_optimized_out(): - print 'Unable to read information on python frame' + print('Unable to read information on python frame') return filename = pyop.filename() @@ -1562,7 +1664,7 @@ def move_in_stack(move_up): if not iter_frame: break - if iter_frame.is_evalframeex(): + if iter_frame.is_python_frame(): # Result: if iter_frame.select(): iter_frame.print_summary() @@ -1571,9 +1673,9 @@ def move_in_stack(move_up): frame = iter_frame if move_up: - print 'Unable to find an older python frame' + print('Unable to find an older python frame') else: - print 'Unable to find a newer python frame' + print('Unable to find a newer python frame') class PyUp(gdb.Command): 'Select and print the python stack frame that called this one (if any)' @@ -1616,7 +1718,7 @@ class PyBacktraceFull(gdb.Command): def invoke(self, args, from_tty): frame = Frame.get_selected_python_frame() while frame: - if frame.is_evalframeex(): + if frame.is_python_frame(): frame.print_summary() frame = frame.older() @@ -1635,7 +1737,7 @@ class PyBacktrace(gdb.Command): sys.stdout.write('Traceback (most recent call first):\n') frame = Frame.get_selected_python_frame() while frame: - if frame.is_evalframeex(): + if frame.is_python_frame(): frame.print_traceback() frame = frame.older() @@ -1655,23 +1757,23 @@ class PyPrint(gdb.Command): frame = Frame.get_selected_python_frame() if not frame: - print 'Unable to locate python frame' + print('Unable to locate python frame') return pyop_frame = frame.get_pyop() if not pyop_frame: - print 'Unable to read information on python frame' + print('Unable to read information on python frame') return pyop_var, scope = pyop_frame.get_var_by_name(name) if pyop_var: - print ('%s %r = %s' + print('%s %r = %s' % (scope, name, pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) else: - print '%r not found' % name + print('%r not found' % name) PyPrint() @@ -1689,16 +1791,16 @@ class PyLocals(gdb.Command): frame = Frame.get_selected_python_frame() if not frame: - print 'Unable to locate python frame' + print('Unable to locate python frame') return pyop_frame = frame.get_pyop() if not pyop_frame: - print 'Unable to read information on python frame' + print('Unable to read information on python frame') return for pyop_name, pyop_value in pyop_frame.iter_locals(): - print ('%s = %s' + print('%s = %s' % (pyop_name.proxyval(set()), pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) diff --git a/Tools/hg/hgtouch.py b/Tools/hg/hgtouch.py new file mode 100644 index 0000000..119d812 --- /dev/null +++ b/Tools/hg/hgtouch.py @@ -0,0 +1,130 @@ +"""Bring time stamps of generated checked-in files into the right order + +A versioned configuration file .hgtouch specifies generated files, in the +syntax of make rules. + + output: input1 input2 + +In addition to the dependency syntax, #-comments are supported. +""" +from __future__ import with_statement +import errno +import os +import time + +def parse_config(repo): + try: + fp = repo.wfile(".hgtouch") + except IOError, e: + if e.errno != errno.ENOENT: + raise + return {} + result = {} + with fp: + for line in fp: + # strip comments + line = line.split('#')[0].strip() + if ':' not in line: + continue + outputs, inputs = line.split(':', 1) + outputs = outputs.split() + inputs = inputs.split() + for o in outputs: + try: + result[o].extend(inputs) + except KeyError: + result[o] = inputs + return result + +def check_rule(ui, repo, modified, basedir, output, inputs): + """Verify that the output is newer than any of the inputs. + Return (status, stamp), where status is True if the update succeeded, + and stamp is the newest time stamp assigned to any file (might be in + the future). + + If basedir is nonempty, it gives a directory in which the tree is to + be checked. + """ + f_output = repo.wjoin(os.path.join(basedir, output)) + try: + o_time = os.stat(f_output).st_mtime + except OSError: + ui.warn("Generated file %s does not exist\n" % output) + return False, 0 + youngest = 0 # youngest dependency + backdate = None + backdate_source = None + for i in inputs: + f_i = repo.wjoin(os.path.join(basedir, i)) + try: + i_time = os.stat(f_i).st_mtime + except OSError: + ui.warn(".hgtouch input file %s does not exist\n" % i) + return False, 0 + if i in modified: + # input is modified. Need to backdate at least to i_time + if backdate is None or backdate > i_time: + backdate = i_time + backdate_source = i + continue + youngest = max(i_time, youngest) + if backdate is not None: + ui.warn("Input %s for file %s locally modified\n" % (backdate_source, output)) + # set to 1s before oldest modified input + backdate -= 1 + os.utime(f_output, (backdate, backdate)) + return False, 0 + if youngest >= o_time: + ui.note("Touching %s\n" % output) + youngest += 1 + os.utime(f_output, (youngest, youngest)) + return True, youngest + else: + # Nothing to update + return True, 0 + +def do_touch(ui, repo, basedir): + if basedir: + if not os.path.isdir(repo.wjoin(basedir)): + ui.warn("Abort: basedir %r does not exist\n" % basedir) + return + modified = [] + else: + modified = repo.status()[0] + dependencies = parse_config(repo) + success = True + tstamp = 0 # newest time stamp assigned + # try processing all rules in topological order + hold_back = {} + while dependencies: + output, inputs = dependencies.popitem() + # check whether any of the inputs is generated + for i in inputs: + if i in dependencies: + hold_back[output] = inputs + continue + _success, _tstamp = check_rule(ui, repo, modified, basedir, output, inputs) + success = success and _success + tstamp = max(tstamp, _tstamp) + # put back held back rules + dependencies.update(hold_back) + hold_back = {} + now = time.time() + if tstamp > now: + # wait until real time has passed the newest time stamp, to + # avoid having files dated in the future + time.sleep(tstamp-now) + if hold_back: + ui.warn("Cyclic dependency involving %s\n" % (' '.join(hold_back.keys()))) + return False + return success + +def touch(ui, repo, basedir): + "touch generated files that are older than their sources after an update." + do_touch(ui, repo, basedir) + +cmdtable = { + "touch": (touch, + [('b', 'basedir', '', 'base dir of the tree to apply touching')], + "hg touch [-b BASEDIR]") +} diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index 68544ac..8b92ae1 100644..100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -13,8 +13,8 @@ LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias' def parse(filename): - f = open(filename) - lines = f.read().splitlines() + with open(filename, encoding='latin1') as f: + lines = list(f) data = {} for line in lines: line = line.strip() @@ -23,6 +23,12 @@ def parse(filename): if line[:1] == '#': continue locale, alias = line.split() + # Fix non-standard locale names, e.g. ks_IN@devanagari.UTF-8 + if '@' in alias: + alias_lang, _, alias_mod = alias.partition('@') + if '.' in alias_mod: + alias_mod, _, alias_enc = alias_mod.partition('.') + alias = alias_lang + '.' + alias_enc + '@' + alias_mod # Strip ':' if locale[-1] == ':': locale = locale[:-1] @@ -47,15 +53,15 @@ def parse(filename): def pprint(data): items = sorted(data.items()) for k, v in items: - print(' %-40s%r,' % ('%r:' % k, v)) + print(' %-40s%a,' % ('%a:' % k, v)) def print_differences(data, olddata): items = sorted(olddata.items()) for k, v in items: if k not in data: - print('# removed %r' % k) + print('# removed %a' % k) elif olddata[k] != data[k]: - print('# updated %r -> %r to %r' % \ + print('# updated %a -> %a to %a' % \ (k, olddata[k], data[k])) # Additions are not mentioned diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index 7b3a81f..cd90691 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -154,7 +154,7 @@ def make(filename, outfile): # This is a message with plural forms elif l.startswith('msgid_plural'): if section != ID: - print('msgid_plural not preceeded by msgid on %s:%d' % (infile, lno), + print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), file=sys.stderr) sys.exit(1) l = l[12:] diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 7793bc6..9ffeb17 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -1,6 +1,6 @@ #! /usr/bin/env python3 # -*- coding: iso-8859-1 -*- -# Originally written by Barry Warsaw <barry@zope.com> +# Originally written by Barry Warsaw <barry@python.org> # # Minimally patched to make it even more xgettext compatible # by Peter Funk <pf@artcom-gmbh.de> @@ -163,7 +163,6 @@ import time import getopt import token import tokenize -import operator __version__ = '1.5' diff --git a/Tools/importbench/README b/Tools/importbench/README new file mode 100644 index 0000000..81a5544 --- /dev/null +++ b/Tools/importbench/README @@ -0,0 +1,6 @@ +Importbench is a set of micro-benchmarks for various import scenarios. + +It should not be used as an overall benchmark of import performance, but rather +an easy way to measure impact of possible code changes. For a real-world +benchmark of import, use the normal_startup benchmark from +hg.python.org/benchmarks. diff --git a/Tools/importbench/importbench.py b/Tools/importbench/importbench.py new file mode 100644 index 0000000..714c0e4 --- /dev/null +++ b/Tools/importbench/importbench.py @@ -0,0 +1,252 @@ +"""Benchmark some basic import use-cases. + +The assumption is made that this benchmark is run in a fresh interpreter and +thus has no external changes made to import-related attributes in sys. + +""" +from test.test_importlib import util +from test.test_importlib.source import util as source_util +import decimal +import imp +import importlib +import importlib.machinery +import json +import os +import py_compile +import sys +import tabnanny +import timeit + + +def bench(name, cleanup=lambda: None, *, seconds=1, repeat=3): + """Bench the given statement as many times as necessary until total + executions take one second.""" + stmt = "__import__({!r})".format(name) + timer = timeit.Timer(stmt) + for x in range(repeat): + total_time = 0 + count = 0 + while total_time < seconds: + try: + total_time += timer.timeit(1) + finally: + cleanup() + count += 1 + else: + # One execution too far + if total_time > seconds: + count -= 1 + yield count // seconds + +def from_cache(seconds, repeat): + """sys.modules""" + name = '<benchmark import>' + module = imp.new_module(name) + module.__file__ = '<test>' + module.__package__ = '' + with util.uncache(name): + sys.modules[name] = module + for result in bench(name, repeat=repeat, seconds=seconds): + yield result + + +def builtin_mod(seconds, repeat): + """Built-in module""" + name = 'errno' + if name in sys.modules: + del sys.modules[name] + # Relying on built-in importer being implicit. + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + + +def source_wo_bytecode(seconds, repeat): + """Source w/o bytecode: small""" + sys.dont_write_bytecode = True + try: + name = '__importlib_test_benchmark__' + # Clears out sys.modules and puts an entry at the front of sys.path. + with source_util.create_modules(name) as mapping: + assert not os.path.exists(imp.cache_from_source(mapping[name])) + sys.meta_path.append(importlib.machinery.PathFinder) + loader = (importlib.machinery.SourceFileLoader, + importlib.machinery.SOURCE_SUFFIXES, True) + sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + finally: + sys.dont_write_bytecode = False + + +def _wo_bytecode(module): + name = module.__name__ + def benchmark_wo_bytecode(seconds, repeat): + """Source w/o bytecode: {}""" + bytecode_path = imp.cache_from_source(module.__file__) + if os.path.exists(bytecode_path): + os.unlink(bytecode_path) + sys.dont_write_bytecode = True + try: + for result in bench(name, lambda: sys.modules.pop(name), + repeat=repeat, seconds=seconds): + yield result + finally: + sys.dont_write_bytecode = False + + benchmark_wo_bytecode.__doc__ = benchmark_wo_bytecode.__doc__.format(name) + return benchmark_wo_bytecode + +tabnanny_wo_bytecode = _wo_bytecode(tabnanny) +decimal_wo_bytecode = _wo_bytecode(decimal) + + +def source_writing_bytecode(seconds, repeat): + """Source writing bytecode: small""" + assert not sys.dont_write_bytecode + name = '__importlib_test_benchmark__' + with source_util.create_modules(name) as mapping: + sys.meta_path.append(importlib.machinery.PathFinder) + loader = (importlib.machinery.SourceFileLoader, + importlib.machinery.SOURCE_SUFFIXES, True) + sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) + def cleanup(): + sys.modules.pop(name) + os.unlink(imp.cache_from_source(mapping[name])) + for result in bench(name, cleanup, repeat=repeat, seconds=seconds): + assert not os.path.exists(imp.cache_from_source(mapping[name])) + yield result + + +def _writing_bytecode(module): + name = module.__name__ + def writing_bytecode_benchmark(seconds, repeat): + """Source writing bytecode: {}""" + assert not sys.dont_write_bytecode + def cleanup(): + sys.modules.pop(name) + os.unlink(imp.cache_from_source(module.__file__)) + for result in bench(name, cleanup, repeat=repeat, seconds=seconds): + yield result + + writing_bytecode_benchmark.__doc__ = ( + writing_bytecode_benchmark.__doc__.format(name)) + return writing_bytecode_benchmark + +tabnanny_writing_bytecode = _writing_bytecode(tabnanny) +decimal_writing_bytecode = _writing_bytecode(decimal) + + +def source_using_bytecode(seconds, repeat): + """Source w/ bytecode: small""" + name = '__importlib_test_benchmark__' + with source_util.create_modules(name) as mapping: + sys.meta_path.append(importlib.machinery.PathFinder) + loader = (importlib.machinery.SourceFileLoader, + importlib.machinery.SOURCE_SUFFIXES, True) + sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) + py_compile.compile(mapping[name]) + assert os.path.exists(imp.cache_from_source(mapping[name])) + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + + +def _using_bytecode(module): + name = module.__name__ + def using_bytecode_benchmark(seconds, repeat): + """Source w/ bytecode: {}""" + py_compile.compile(module.__file__) + for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds): + yield result + + using_bytecode_benchmark.__doc__ = ( + using_bytecode_benchmark.__doc__.format(name)) + return using_bytecode_benchmark + +tabnanny_using_bytecode = _using_bytecode(tabnanny) +decimal_using_bytecode = _using_bytecode(decimal) + + +def main(import_, options): + if options.source_file: + with options.source_file: + prev_results = json.load(options.source_file) + else: + prev_results = {} + __builtins__.__import__ = import_ + benchmarks = (from_cache, builtin_mod, + source_writing_bytecode, + source_wo_bytecode, source_using_bytecode, + tabnanny_writing_bytecode, + tabnanny_wo_bytecode, tabnanny_using_bytecode, + decimal_writing_bytecode, + decimal_wo_bytecode, decimal_using_bytecode, + ) + if options.benchmark: + for b in benchmarks: + if b.__doc__ == options.benchmark: + benchmarks = [b] + break + else: + print('Unknown benchmark: {!r}'.format(options.benchmark, + file=sys.stderr)) + sys.exit(1) + seconds = 1 + seconds_plural = 's' if seconds > 1 else '' + repeat = 3 + header = ('Measuring imports/second over {} second{}, best out of {}\n' + 'Entire benchmark run should take about {} seconds\n' + 'Using {!r} as __import__\n') + print(header.format(seconds, seconds_plural, repeat, + len(benchmarks) * seconds * repeat, __import__)) + new_results = {} + for benchmark in benchmarks: + print(benchmark.__doc__, "[", end=' ') + sys.stdout.flush() + results = [] + for result in benchmark(seconds=seconds, repeat=repeat): + results.append(result) + print(result, end=' ') + sys.stdout.flush() + assert not sys.dont_write_bytecode + print("]", "best is", format(max(results), ',d')) + new_results[benchmark.__doc__] = results + if prev_results: + print('\n\nComparing new vs. old\n') + for benchmark in benchmarks: + benchmark_name = benchmark.__doc__ + old_result = max(prev_results[benchmark_name]) + new_result = max(new_results[benchmark_name]) + result = '{:,d} vs. {:,d} ({:%})'.format(new_result, + old_result, + new_result/old_result) + print(benchmark_name, ':', result) + if options.dest_file: + with options.dest_file: + json.dump(new_results, options.dest_file, indent=2) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('-b', '--builtin', dest='builtin', action='store_true', + default=False, help="use the built-in __import__") + parser.add_argument('-r', '--read', dest='source_file', + type=argparse.FileType('r'), + help='file to read benchmark data from to compare ' + 'against') + parser.add_argument('-w', '--write', dest='dest_file', + type=argparse.FileType('w'), + help='file to write benchmark data to') + parser.add_argument('--benchmark', dest='benchmark', + help='specific benchmark to run') + options = parser.parse_args() + import_ = __import__ + if not options.builtin: + import_ = importlib.__import__ + + main(import_, options) diff --git a/Tools/iobench/iobench.py b/Tools/iobench/iobench.py index 5ec6f17..530bc79 100644 --- a/Tools/iobench/iobench.py +++ b/Tools/iobench/iobench.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- # This file should be kept compatible with both Python 2.6 and Python >= 3.0. -import time +import itertools import os +import platform import re import sys -import hashlib -import functools -import itertools +import time from optparse import OptionParser out = sys.stdout @@ -307,6 +306,16 @@ def run_all_tests(options): "large": 2, } + print("Python %s" % sys.version) + if sys.version_info < (3, 3): + if sys.maxunicode > 0xffff: + text = "UCS-4 (wide build)" + else: + text = "UTF-16 (narrow build)" + else: + text = "PEP 393" + print("Unicode: %s" % text) + print(platform.platform()) binary_files = list(get_binary_files()) text_files = list(get_text_files()) if "b" in options: diff --git a/Tools/msi/msi.py b/Tools/msi/msi.py index 508816d..5ed025d 100644 --- a/Tools/msi/msi.py +++ b/Tools/msi/msi.py @@ -2,12 +2,11 @@ # (C) 2003 Martin v. Loewis # See "FOO" in comments refers to MSDN sections with the title FOO. import msilib, schema, sequence, os, glob, time, re, shutil, zipfile +import subprocess, tempfile from msilib import Feature, CAB, Directory, Dialog, Binary, add_data import uisample from win32com.client import constants from distutils.spawn import find_executable -from uuids import product_codes -import tempfile # Settings can be overridden in config.py below # 0 for official python.org releases @@ -28,7 +27,7 @@ have_tcl = True # path to PCbuild directory PCBUILD="PCbuild" # msvcrt version -MSVCR = "90" +MSVCR = "100" # Name of certificate in default store to sign MSI with certname = None # Make a zip file containing the PDB files for this build? @@ -77,19 +76,16 @@ upgrade_code_64='{6A965A0C-6EE6-4E3A-9983-3263F56311EC}' if snapshot: current_version = "%s.%s.%s" % (major, minor, int(time.time()/3600/24)) - product_code = msilib.gen_uuid() -else: - product_code = product_codes[current_version] if full_current_version is None: full_current_version = current_version extensions = [ - 'bz2.pyd', 'pyexpat.pyd', 'select.pyd', 'unicodedata.pyd', 'winsound.pyd', + '_bz2.pyd', '_elementtree.pyd', '_socket.pyd', '_ssl.pyd', @@ -100,7 +96,10 @@ extensions = [ '_ctypes_test.pyd', '_sqlite3.pyd', '_hashlib.pyd', - '_multiprocessing.pyd' + '_multiprocessing.pyd', + '_lzma.pyd', + '_decimal.pyd', + '_testbuffer.pyd' ] # Well-known component UUIDs @@ -119,12 +118,11 @@ pythondll_uuid = { "30":"{6953bc3b-6768-4291-8410-7914ce6e2ca8}", "31":"{4afcba0b-13e4-47c3-bebe-477428b46913}", "32":"{3ff95315-1096-4d31-bd86-601d5438ad5e}", + "33":"{f7581ca4-d368-4eea-8f82-d48c64c4f047}", } [major+minor] # Compute the name that Sphinx gives to the docfile -docfile = "" -if int(micro): - docfile = micro +docfile = micro if level < 0xf: if level == 0xC: docfile += "rc%s" % (serial,) @@ -185,12 +183,19 @@ dll_path = os.path.join(srcdir, PCBUILD, dll_file) msilib.set_arch_from_file(dll_path) if msilib.pe_type(dll_path) != msilib.pe_type("msisupport.dll"): raise SystemError("msisupport.dll for incorrect architecture") + if msilib.Win64: upgrade_code = upgrade_code_64 - # Bump the last digit of the code by one, so that 32-bit and 64-bit - # releases get separate product codes - digit = hex((int(product_code[-2],16)+1)%16)[-1] - product_code = product_code[:-2] + digit + '}' + +if snapshot: + product_code = msilib.gen_uuid() +else: + # official release: generate UUID from the download link that the file will have + import uuid + product_code = uuid.uuid3(uuid.NAMESPACE_URL, + 'http://www.python.org/ftp/python/%s.%s.%s/python-%s%s.msi' % + (major, minor, micro, full_current_version, msilib.arch_ext)) + product_code = '{%s}' % product_code if testpackage: ext = 'px' @@ -281,7 +286,7 @@ def remove_old_versions(db): None, migrate_features, None, "REMOVEOLDSNAPSHOT")]) props = "REMOVEOLDSNAPSHOT;REMOVEOLDVERSION" - props += ";TARGETDIR;DLLDIR" + props += ";TARGETDIR;DLLDIR;LAUNCHERDIR" # Installer collects the product codes of the earlier releases in # these properties. In order to allow modification of the properties, # they must be declared as secure. See "SecureCustomProperties Property" @@ -410,7 +415,7 @@ def add_ui(db): ("VerdanaRed9", "Verdana", 9, 255, 0), ]) - compileargs = r'-Wi "[TARGETDIR]Lib\compileall.py" -f -x "bad_coding|badsyntax|site-packages|py2_|lib2to3\\tests" "[TARGETDIR]Lib"' + compileargs = r'-Wi "[TARGETDIR]Lib\compileall.py" -f -x "bad_coding|badsyntax|site-packages|py2_|lib2to3\\tests|venv\\scripts" "[TARGETDIR]Lib"' lib2to3args = r'-c "import lib2to3.pygram, lib2to3.patcomp;lib2to3.patcomp.PatternCompiler()"' # See "CustomAction Table" add_data(db, "CustomAction", [ @@ -421,6 +426,8 @@ def add_ui(db): "[WindowsVolume]Python%s%s" % (major, minor)), ("SetDLLDirToTarget", 307, "DLLDIR", "[TARGETDIR]"), ("SetDLLDirToSystem32", 307, "DLLDIR", SystemFolderName), + ("SetLauncherDirToTarget", 307, "LAUNCHERDIR", "[TARGETDIR]"), + ("SetLauncherDirToWindows", 307, "LAUNCHERDIR", "[WindowsFolder]"), # msidbCustomActionTypeExe + msidbCustomActionTypeSourceFile # See "Custom Action Type 18" ("CompilePyc", 18, "python.exe", compileargs), @@ -437,6 +444,8 @@ def add_ui(db): # In the user interface, assume all-users installation if privileged. ("SetDLLDirToSystem32", 'DLLDIR="" and ' + sys32cond, 751), ("SetDLLDirToTarget", 'DLLDIR="" and not ' + sys32cond, 752), + ("SetLauncherDirToWindows", 'LAUNCHERDIR="" and ' + sys32cond, 753), + ("SetLauncherDirToTarget", 'LAUNCHERDIR="" and not ' + sys32cond, 754), ("SelectDirectoryDlg", "Not Installed", 1230), # XXX no support for resume installations yet #("ResumeDlg", "Installed AND (RESUME OR Preselected)", 1240), @@ -445,13 +454,20 @@ def add_ui(db): add_data(db, "AdminUISequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToTarget", 'DLLDIR=""', 751), + ("SetLauncherDirToTarget", 'LAUNCHERDIR=""', 752), ]) + # Prepend TARGETDIR to the system path, and remove it on uninstall. + add_data(db, "Environment", + [("PathAddition", "=-*Path", "[TARGETDIR];[~]", "REGISTRY.path")]) + # Execute Sequences add_data(db, "InstallExecuteSequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToSystem32", 'DLLDIR="" and ' + sys32cond, 751), ("SetDLLDirToTarget", 'DLLDIR="" and not ' + sys32cond, 752), + ("SetLauncherDirToWindows", 'LAUNCHERDIR="" and ' + sys32cond, 753), + ("SetLauncherDirToTarget", 'LAUNCHERDIR="" and not ' + sys32cond, 754), ("UpdateEditIDLE", None, 1050), ("CompilePyc", "COMPILEALL", 6800), ("CompilePyo", "COMPILEALL", 6801), @@ -460,6 +476,7 @@ def add_ui(db): add_data(db, "AdminExecuteSequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToTarget", 'DLLDIR=""', 751), + ("SetLauncherDirToTarget", 'LAUNCHERDIR=""', 752), ("CompilePyc", "COMPILEALL", 6800), ("CompilePyo", "COMPILEALL", 6801), ("CompileGrammar", "COMPILEALL", 6802), @@ -670,11 +687,11 @@ def add_ui(db): c=features.xbutton("Advanced", "Advanced", None, 0.30) c.event("SpawnDialog", "AdvancedDlg") - c=features.text("ItemDescription", 140, 180, 210, 30, 3, + c=features.text("ItemDescription", 140, 180, 210, 40, 3, "Multiline description of the currently selected item.") c.mapping("SelectionDescription","Text") - c=features.text("ItemSize", 140, 210, 210, 45, 3, + c=features.text("ItemSize", 140, 225, 210, 33, 3, "The size of the currently selected item.") c.mapping("SelectionSize", "Text") @@ -828,7 +845,7 @@ def add_features(db): # (i.e. additional Python libraries) need to follow the parent feature. # Features that have no advertisement trigger (e.g. the test suite) # must not support advertisement - global default_feature, tcltk, htmlfiles, tools, testsuite, ext_feature, private_crt + global default_feature, tcltk, htmlfiles, tools, testsuite, ext_feature, private_crt, prepend_path default_feature = Feature(db, "DefaultFeature", "Python", "Python Interpreter and Libraries", 1, directory = "TARGETDIR") @@ -848,32 +865,38 @@ def add_features(db): htmlfiles = Feature(db, "Documentation", "Documentation", "Python HTMLHelp File", 7, parent = default_feature) tools = Feature(db, "Tools", "Utility Scripts", - "Python utility scripts (Tools/", 9, + "Python utility scripts (Tools/)", 9, parent = default_feature, attributes=2) testsuite = Feature(db, "Testsuite", "Test suite", "Python test suite (Lib/test/)", 11, parent = default_feature, attributes=2|8) - -def extract_msvcr90(): + # prepend_path is an additional feature which is to be off by default. + # Since the default level for the above features is 1, this needs to be + # at least level higher. + prepend_path = Feature(db, "PrependPath", "Add python.exe to Path", + "Prepend [TARGETDIR] to the system Path variable. " + "This allows you to type 'python' into a command " + "prompt without needing the full path.", 13, + parent = default_feature, attributes=2|8, + level=2) + +def extract_msvcr100(): # Find the redistributable files if msilib.Win64: - arch = "amd64" + arch = "x64" else: arch = "x86" - dir = os.path.join(os.environ['VS90COMNTOOLS'], r"..\..\VC\redist\%s\Microsoft.VC90.CRT" % arch) + dir = os.path.join(os.environ['VS100COMNTOOLS'], r"..\..\VC\redist\%s\Microsoft.VC100.CRT" % arch) result = [] installer = msilib.MakeInstaller() - # omit msvcm90 and msvcp90, as they aren't really needed - files = ["Microsoft.VC90.CRT.manifest", "msvcr90.dll"] - for f in files: - path = os.path.join(dir, f) - kw = {'src':path} - if f.endswith('.dll'): - kw['version'] = installer.FileVersion(path, 0) - kw['language'] = installer.FileVersion(path, 1) - result.append((f, kw)) - return result + # At least for VS2010, manifests are no longer provided + name = "msvcr100.dll" + path = os.path.join(dir, name) + kw = {'src':path} + kw['version'] = installer.FileVersion(path, 0) + kw['language'] = installer.FileVersion(path, 1) + return name, kw def generate_license(): import shutil, glob @@ -889,7 +912,7 @@ def generate_license(): dirs = glob.glob(srcdir+"/../"+pat) if not dirs: raise ValueError, "Could not find "+srcdir+"/../"+pat - if len(dirs) > 2: + if len(dirs) > 2 and not snapshot: raise ValueError, "Multiple copies of "+pat dir = dirs[0] shutil.copyfileobj(open(os.path.join(dir, file)), out) @@ -904,16 +927,28 @@ class PyDirectory(Directory): kw['componentflags'] = 2 #msidbComponentAttributesOptional Directory.__init__(self, *args, **kw) - def check_unpackaged(self): - self.unpackaged_files.discard('__pycache__') - self.unpackaged_files.discard('.svn') - if self.unpackaged_files: - print "Warning: Unpackaged files in %s" % self.absolute - print self.unpackaged_files +def hgmanifest(): + # Fetch file list from Mercurial + process = subprocess.Popen(['hg', 'manifest'], stdout=subprocess.PIPE) + stdout, stderr = process.communicate() + # Create nested directories for file tree + result = {} + for line in stdout.splitlines(): + components = line.split('/') + d = result + while len(components) > 1: + d1 = d.setdefault(components[0], {}) + d = d1 + del components[0] + d[components[0]] = None + return result + # See "File Table", "Component Table", "Directory Table", # "FeatureComponents Table" def add_files(db): + installer = msilib.MakeInstaller() + hgfiles = hgmanifest() cab = CAB("python") tmpfiles = [] # Add all executables, icons, text files into the TARGETDIR component @@ -932,11 +967,32 @@ def add_files(db): # msidbComponentAttributesSharedDllRefCount = 8, see "Component Table" dlldir = PyDirectory(db, cab, root, srcdir, "DLLDIR", ".") + launcherdir = PyDirectory(db, cab, root, srcdir, "LAUNCHERDIR", ".") + + # msidbComponentAttributes64bit = 256; this disables registry redirection + # to allow setting the SharedDLLs key in the 64-bit portion even for a + # 32-bit installer. + # XXX does this still allow to install the component on a 32-bit system? + # Pick up 32-bit binary always + launchersrc = PCBUILD + if launchersrc.lower() == 'pcbuild\\x64-pgo': + launchersrc = 'PCBuild\\win32-pgo' + if launchersrc.lower() == 'pcbuild\\amd64': + launchersrc = 'PCBuild' + launcher = os.path.join(srcdir, launchersrc, "py.exe") + launcherdir.start_component("launcher", flags = 8+256, keyfile="py.exe") + launcherdir.add_file(launcher, + version=installer.FileVersion(launcher, 0), + language=installer.FileVersion(launcher, 1)) + launcherw = os.path.join(srcdir, launchersrc, "pyw.exe") + launcherdir.start_component("launcherw", flags = 8+256, keyfile="pyw.exe") + launcherdir.add_file(launcherw, + version=installer.FileVersion(launcherw, 0), + language=installer.FileVersion(launcherw, 1)) pydll = "python%s%s.dll" % (major, minor) pydllsrc = os.path.join(srcdir, PCBUILD, pydll) dlldir.start_component("DLLDIR", flags = 8, keyfile = pydll, uuid = pythondll_uuid) - installer = msilib.MakeInstaller() pyversion = installer.FileVersion(pydllsrc, 0) if not snapshot: # For releases, the Python DLL has the same version as the @@ -952,9 +1008,8 @@ def add_files(db): # pointing to the root directory root.start_component("msvcr90", feature=private_crt) # Results are ID,keyword pairs - manifest, crtdll = extract_msvcr90() - root.add_file(manifest[0], **manifest[1]) - root.add_file(crtdll[0], **crtdll[1]) + crtdll, kwds = extract_msvcr100() + root.add_file(crtdll, **kwds) # Copy the manifest # Actually, don't do that anymore - no DLL in DLLs should have a manifest # dependency on msvcr90.dll anymore, so this should not be necessary @@ -975,104 +1030,40 @@ def add_files(db): # Add all .py files in Lib, except tkinter, test dirs = [] - pydirs = [(root,"Lib")] + pydirs = [(root, "Lib", hgfiles["Lib"], default_feature)] while pydirs: # Commit every now and then, or else installer will complain db.Commit() - parent, dir = pydirs.pop() - if dir == ".svn" or dir == '__pycache__' or dir.startswith("plat-"): + parent, dir, files, feature = pydirs.pop() + if dir.startswith("plat-"): continue - elif dir in ["tkinter", "idlelib", "Icons"]: + if dir in ["tkinter", "idlelib", "turtledemo"]: if not have_tcl: continue + feature = tcltk tcltk.set_current() - elif dir in ['test', 'tests', 'data', 'output']: - # test: Lib, Lib/email, Lib/ctypes, Lib/sqlite3 - # tests: Lib/distutils - # data: Lib/email/test - # output: Lib/test - testsuite.set_current() + elif dir in ('test', 'tests'): + feature = testsuite elif not have_ctypes and dir == "ctypes": continue - else: - default_feature.set_current() + feature.set_current() lib = PyDirectory(db, cab, parent, dir, dir, "%s|%s" % (parent.make_short(dir), dir)) - # Add additional files dirs.append(lib) - lib.glob("*.txt") - if dir=='site-packages': - lib.add_file("README.txt", src="README") - continue - files = lib.glob("*.py") - files += lib.glob("*.pyw") - if files: - # Add an entry to the RemoveFile table to remove bytecode files. - lib.remove_pyc() - # package READMEs if present - lib.glob("README") - if dir=='Lib': - lib.add_file('wsgiref.egg-info') - if dir=='test' and parent.physical=='Lib': - lib.add_file("185test.db") - lib.add_file("audiotest.au") - lib.add_file("sgml_input.html") - lib.add_file("testtar.tar") - lib.add_file("test_difflib_expect.html") - lib.add_file("check_soundcard.vbs") - lib.add_file("empty.vbs") - lib.add_file("Sine-1000Hz-300ms.aif") - lib.add_file("mime.types") - lib.glob("*.uue") - lib.glob("*.pem") - lib.glob("*.pck") - lib.glob("cfgparser.*") - lib.add_file("zip_cp437_header.zip") - lib.add_file("zipdir.zip") - if dir=='capath': - lib.glob("*.0") - if dir=='tests' and parent.physical=='distutils': - lib.add_file("Setup.sample") - if dir=='decimaltestdata': - lib.glob("*.decTest") - if dir=='xmltestdata': - lib.glob("*.xml") - lib.add_file("test.xml.out") - if dir=='output': - lib.glob("test_*") - if dir=='sndhdrdata': - lib.glob("sndhdr.*") - if dir=='idlelib': - lib.glob("*.def") - lib.add_file("idle.bat") - lib.add_file("ChangeLog") - if dir=="Icons": - lib.glob("*.gif") - lib.add_file("idle.icns") - if dir=="command" and parent.physical=="distutils": - lib.glob("wininst*.exe") - lib.add_file("command_template") - if dir=="lib2to3": - lib.removefile("pickle", "*.pickle") - if dir=="macholib": - lib.add_file("README.ctypes") - lib.glob("fetch_macholib*") - if dir=='turtledemo': - lib.add_file("turtle.cfg") - if dir=="pydoc_data": - lib.add_file("_pydoc.css") - if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email": - # This should contain all non-.svn files listed in subversion - for f in os.listdir(lib.absolute): - if f.endswith(".txt") or f==".svn":continue - if f.endswith(".au") or f.endswith(".gif"): - lib.add_file(f) + has_py = False + for name, subdir in files.items(): + if subdir is None: + assert os.path.isfile(os.path.join(lib.absolute, name)) + if name == 'README': + lib.add_file("README.txt", src="README") else: - print("WARNING: New file %s in email/test/data" % f) - for f in os.listdir(lib.absolute): - if os.path.isdir(os.path.join(lib.absolute, f)): - pydirs.append((lib, f)) - for d in dirs: - d.check_unpackaged() + lib.add_file(name) + has_py = has_py or name.endswith(".py") or name.endswith(".pyw") + else: + assert os.path.isdir(os.path.join(lib.absolute, name)) + pydirs.append((lib, name, subdir, feature)) + + if has_py: + lib.remove_pyc() # Add DLLs default_feature.set_current() lib = DLLs @@ -1159,6 +1150,8 @@ def add_files(db): lib.add_file("README.txt", src="README") if f == 'Scripts': lib.add_file("2to3.py", src="2to3") + lib.add_file("pydoc3.py", src="pydoc3") + lib.add_file("pyvenv.py", src="pyvenv") if have_tcl: lib.start_component("pydocgui.pyw", tcltk, keyfile="pydocgui.pyw") lib.add_file("pydocgui.pyw") @@ -1190,6 +1183,8 @@ def add_registry(db): "InstallPath"), ("REGISTRY.doc", msilib.gen_uuid(), "TARGETDIR", registry_component, None, "Documentation"), + ("REGISTRY.path", msilib.gen_uuid(), "TARGETDIR", registry_component, None, + None), ("REGISTRY.def", msilib.gen_uuid(), "TARGETDIR", registry_component, None, None)] + tcldata) # See "FeatureComponents Table". @@ -1206,6 +1201,7 @@ def add_registry(db): add_data(db, "FeatureComponents", [(default_feature.id, "REGISTRY"), (htmlfiles.id, "REGISTRY.doc"), + (prepend_path.id, "REGISTRY.path"), (ext_feature.id, "REGISTRY.def")] + tcldata ) @@ -1244,11 +1240,11 @@ def add_registry(db): "text/plain", "REGISTRY.def"), #Verbs ("py.open", -1, pat % (testprefix, "", "open"), "", - r'"[TARGETDIR]python.exe" "%1" %*', "REGISTRY.def"), + r'"[LAUNCHERDIR]py.exe" "%1" %*', "REGISTRY.def"), ("pyw.open", -1, pat % (testprefix, "NoCon", "open"), "", - r'"[TARGETDIR]pythonw.exe" "%1" %*', "REGISTRY.def"), + r'"[LAUNCHERDIR]pyw.exe" "%1" %*', "REGISTRY.def"), ("pyc.open", -1, pat % (testprefix, "Compiled", "open"), "", - r'"[TARGETDIR]python.exe" "%1" %*', "REGISTRY.def"), + r'"[LAUNCHERDIR]py.exe" "%1" %*', "REGISTRY.def"), ] + tcl_verbs + [ #Icons ("py.icon", -1, pat2 % (testprefix, ""), "", @@ -1347,9 +1343,9 @@ finally: # Merge CRT into MSI file. This requires the database to be closed. mod_dir = os.path.join(os.environ["ProgramFiles"], "Common Files", "Merge Modules") if msilib.Win64: - modules = ["Microsoft_VC90_CRT_x86_x64.msm", "policy_9_0_Microsoft_VC90_CRT_x86_x64.msm"] + modules = ["Microsoft_VC100_CRT_x64.msm"] else: - modules = ["Microsoft_VC90_CRT_x86.msm","policy_9_0_Microsoft_VC90_CRT_x86.msm"] + modules = ["Microsoft_VC100_CRT_x86.msm"] for i, n in enumerate(modules): modules[i] = os.path.join(mod_dir, n) @@ -1414,7 +1410,10 @@ merge(msiname, "SharedCRT", "TARGETDIR", modules) # certname (from config.py) should be (a substring of) # the certificate subject, e.g. "Python Software Foundation" if certname: - os.system('signtool sign /n "%s" /t http://timestamp.verisign.com/scripts/timestamp.dll %s' % (certname, msiname)) + os.system('signtool sign /n "%s" ' + '/t http://timestamp.verisign.com/scripts/timestamp.dll ' + '/d "Python %s" ' + '%s' % (certname, full_current_version, msiname)) if pdbzip: build_pdbzip() diff --git a/Tools/msi/msilib.py b/Tools/msi/msilib.py index 92fb5e1..c208b91 100644 --- a/Tools/msi/msilib.py +++ b/Tools/msi/msilib.py @@ -305,7 +305,7 @@ def init_database(name, schema, t.create(db) # Fill the validation table add_data(db, "_Validation", schema._Validation_records) - # Initialize the summary information, allowing atmost 20 properties + # Initialize the summary information, allowing at most 20 properties si = db.GetSummaryInformation(20) si.SetProperty(PID_TITLE, "Installation Database") si.SetProperty(PID_SUBJECT, ProductName) @@ -408,7 +408,7 @@ class Directory: self.physical = physical self.logical = logical self.component = None - self.short_names = sets.Set() + self.short_names = {} self.ids = sets.Set() self.keyfiles = {} self.componentflags = componentflags @@ -456,23 +456,25 @@ class Directory: [(feature.id, component)]) def make_short(self, file): + long = file file = re.sub(r'[\?|><:/*"+,;=\[\]]', '_', file) # restrictions on short names - parts = file.split(".") + parts = file.split(".", 1) if len(parts)>1: - suffix = parts[-1].upper() + suffix = parts[1].upper() else: - suffix = None + suffix = '' prefix = parts[0].upper() - if len(prefix) <= 8 and (not suffix or len(suffix)<=3): + if len(prefix) <= 8 and '.' not in suffix and len(suffix) <= 3: if suffix: file = prefix+"."+suffix else: file = prefix - assert file not in self.short_names + assert file not in self.short_names, (file, self.short_names[file]) else: prefix = prefix[:6] if suffix: - suffix = suffix[:3] + # last three characters of last suffix + suffix = suffix.rsplit('.')[-1][:3] pos = 1 while 1: if suffix: @@ -484,7 +486,7 @@ class Directory: assert pos < 10000 if pos in (10, 100, 1000): prefix = prefix[:-1] - self.short_names.add(file) + self.short_names[file] = long return file def add_file(self, file, src=None, version=None, language=None): diff --git a/Tools/msi/schema.py b/Tools/msi/schema.py index d028a11..1f72e5a 100644 --- a/Tools/msi/schema.py +++ b/Tools/msi/schema.py @@ -958,7 +958,7 @@ _Validation_records = [ (u'ServiceInstall',u'StartType',u'N',0,4,None, None, None, None, u'Type of the service',), (u'Shortcut',u'Name',u'N',None, None, None, None, u'Filename',None, u'The name of the shortcut to be created.',), (u'Shortcut',u'Description',u'Y',None, None, None, None, u'Text',None, u'The description for the shortcut.',), -(u'Shortcut',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table denoting the component whose selection gates the the shortcut creation/deletion.',), +(u'Shortcut',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table denoting the component whose selection gates the shortcut creation/deletion.',), (u'Shortcut',u'Icon_',u'Y',None, None, u'Icon',1,u'Identifier',None, u'Foreign key into the File table denoting the external icon file for the shortcut.',), (u'Shortcut',u'IconIndex',u'Y',-32767,32767,None, None, None, None, u'The icon index for the shortcut.',), (u'Shortcut',u'Directory_',u'N',None, None, u'Directory',1,u'Identifier',None, u'Foreign key into the Directory table denoting the directory where the shortcut file is created.',), diff --git a/Tools/msi/uisample.py b/Tools/msi/uisample.py index 2bdf59e..5430805 100644 --- a/Tools/msi/uisample.py +++ b/Tools/msi/uisample.py @@ -1195,7 +1195,7 @@ _Validation = [ (u'ServiceInstall', u'StartType', u'N', 0, 4, None, None, None, None, u'Type of the service'), (u'Shortcut', u'Name', u'N', None, None, None, None, u'Filename', None, u'The name of the shortcut to be created.'), (u'Shortcut', u'Description', u'Y', None, None, None, None, u'Text', None, u'The description for the shortcut.'), -(u'Shortcut', u'Component_', u'N', None, None, u'Component', 1, u'Identifier', None, u'Foreign key into the Component table denoting the component whose selection gates the the shortcut creation/deletion.'), +(u'Shortcut', u'Component_', u'N', None, None, u'Component', 1, u'Identifier', None, u'Foreign key into the Component table denoting the component whose selection gates the shortcut creation/deletion.'), (u'Shortcut', u'Icon_', u'Y', None, None, u'Icon', 1, u'Identifier', None, u'Foreign key into the File table denoting the external icon file for the shortcut.'), (u'Shortcut', u'IconIndex', u'Y', -32767, 32767, None, None, None, None, u'The icon index for the shortcut.'), (u'Shortcut', u'Directory_', u'N', None, None, u'Directory', 1, u'Identifier', None, u'Foreign key into the Directory table denoting the directory where the shortcut file is created.'), diff --git a/Tools/msi/uuids.py b/Tools/msi/uuids.py deleted file mode 100644 index 90de2ff..0000000 --- a/Tools/msi/uuids.py +++ /dev/null @@ -1,101 +0,0 @@ -# This should be extended for each Python release. -# The product code must change whenever the name of the MSI file -# changes, and when new component codes are issued for existing -# components. See "Changing the Product Code". As we change the -# component codes with every build, we need a new product code -# each time. For intermediate (snapshot) releases, they are automatically -# generated. For official releases, we record the product codes, -# so people can refer to them. -product_codes = { - '2.5.101': '{bc14ce3e-5e72-4a64-ac1f-bf59a571898c}', # 2.5a1 - '2.5.102': '{5eed51c1-8e9d-4071-94c5-b40de5d49ba5}', # 2.5a2 - '2.5.103': '{73dcd966-ffec-415f-bb39-8342c1f47017}', # 2.5a3 - '2.5.111': '{c797ecf8-a8e6-4fec-bb99-526b65f28626}', # 2.5b1 - '2.5.112': '{32beb774-f625-439d-b587-7187487baf15}', # 2.5b2 - '2.5.113': '{89f23918-11cf-4f08-be13-b9b2e6463fd9}', # 2.5b3 - '2.5.121': '{8e9321bc-6b24-48a3-8fd4-c95f8e531e5f}', # 2.5c1 - '2.5.122': '{a6cd508d-9599-45da-a441-cbffa9f7e070}', # 2.5c2 - '2.5.150': '{0a2c5854-557e-48c8-835a-3b9f074bdcaa}', # 2.5.0 - '2.5.1121':'{0378b43e-6184-4c2f-be1a-4a367781cd54}', # 2.5.1c1 - '2.5.1150':'{31800004-6386-4999-a519-518f2d78d8f0}', # 2.5.1 - '2.5.2150':'{6304a7da-1132-4e91-a343-a296269eab8a}', # 2.5.2c1 - '2.5.2150':'{6b976adf-8ae8-434e-b282-a06c7f624d2f}', # 2.5.2 - '2.6.101': '{0ba82e1b-52fd-4e03-8610-a6c76238e8a8}', # 2.6a1 - '2.6.102': '{3b27e16c-56db-4570-a2d3-e9a26180c60b}', # 2.6a2 - '2.6.103': '{cd06a9c5-bde5-4bd7-9874-48933997122a}', # 2.6a3 - '2.6.104': '{dc6ed634-474a-4a50-a547-8de4b7491e53}', # 2.6a4 - '2.6.111': '{3f82079a-5bee-4c4a-8a41-8292389e24ae}', # 2.6b1 - '2.6.112': '{8a0e5970-f3e6-4737-9a2b-bc5ff0f15fb5}', # 2.6b2 - '2.6.113': '{df4f5c21-6fcc-4540-95de-85feba634e76}', # 2.6b3 - '2.6.121': '{bbd34464-ddeb-4028-99e5-f16c4a8fbdb3}', # 2.6c1 - '2.6.122': '{8f64787e-a023-4c60-bfee-25d3a3f592c6}', # 2.6c2 - '2.6.150': '{110eb5c4-e995-4cfb-ab80-a5f315bea9e8}', # 2.6.0 - '2.6.1150':'{9cc89170-000b-457d-91f1-53691f85b223}', # 2.6.1 - '2.6.2121':'{adac412b-b209-4c15-b6ab-dca1b6e47144}', # 2.6.2c1 - '2.6.2150':'{24aab420-4e30-4496-9739-3e216f3de6ae}', # 2.6.2 - '2.6.3121':'{a73e0254-dcda-4fe4-bf37-c7e1c4f4ebb6}', # 2.6.3c1 - '2.6.3150':'{3d9ac095-e115-4e94-bdef-7f7edf17697d}', # 2.6.3 - '2.6.4121':'{727de605-0359-4606-a94b-c2033652379b}', # 2.6.4c1 - '2.6.4122':'{4f7603c6-6352-4299-a398-150a31b19acc}', # 2.6.4c2 - '2.6.4150':'{e7394a0f-3f80-45b1-87fc-abcd51893246}', # 2.6.4 - '2.6.5121':'{e0e273d7-7598-4701-8325-c90c069fd5ff}', # 2.6.5c1 - '2.6.5122':'{fa227b76-0671-4dc6-b826-c2ff2a70dfd5}', # 2.6.5c2 - '2.6.5150':'{4723f199-fa64-4233-8e6e-9fccc95a18ee}', # 2.6.5 - '2.7.101': '{eca1bbef-432c-49ae-a667-c213cc7bbf22}', # 2.7a1 - '2.7.102': '{21ce16ed-73c4-460d-9b11-522f417b2090}', # 2.7a2 - '2.7.103': '{6e7dbd55-ba4a-48ac-a688-6c75db4d7500}', # 2.7a3 - '2.7.104': '{ee774ba3-74a5-48d9-b425-b35a287260c8}', # 2.7a4 - '2.7.111': '{9cfd9ec7-a9c7-4980-a1c6-054fc6493eb3}', # 2.7b1 - '2.7.112': '{9a72faf6-c304-4165-8595-9291ff30cac6}', # 2.7b2 - '2.7.121': '{f530c94a-dd53-4de9-948e-b632b9cb48d2}', # 2.7c1 - '2.7.122': '{f80905d2-dd8d-4b8e-8a40-c23c93dca07d}', # 2.7c2 - '2.7.150': '{20c31435-2a0a-4580-be8b-ac06fc243ca4}', # 2.7.0 - '3.0.101': '{8554263a-3242-4857-9359-aa87bc2c58c2}', # 3.0a1 - '3.0.102': '{692d6e2c-f0ac-40b8-a133-7191aeeb67f9}', # 3.0a2 - '3.0.103': '{49cb2995-751a-4753-be7a-d0b1bb585e06}', # 3.0a3 - '3.0.104': '{87cb019e-19fd-4238-b1c7-85751437d646}', # 3.0a4 - '3.0.105': '{cf2659af-19ec-43d2-8c35-0f6a09439d42}', # 3.0a5 - '3.0.111': '{36c26f55-837d-45cf-848c-5f5c0fb47a28}', # 3.0b1 - '3.0.112': '{056a0fbc-c8fe-4c61-aade-c4411b70c998}', # 3.0b2 - '3.0.113': '{2b2e89a9-83af-43f9-b7d5-96e80c5a3f26}', # 3.0b3 - '3.0.114': '{e95c31af-69be-4dd7-96e6-e5fc85e660e6}', # 3.0b4 - '3.0.121': '{d0979c5e-cd3c-42ec-be4c-e294da793573}', # 3.0c1 - '3.0.122': '{f707b8e9-a257-4045-818e-4923fc20fbb6}', # 3.0c2 - '3.0.123': '{5e7208f1-8643-4ea2-ab5e-4644887112e3}', # 3.0c3 - '3.0.150': '{e0e56e21-55de-4f77-a109-1baa72348743}', # 3.0.0 - '3.0.1121':'{d35b1ea5-3d70-4872-bf7e-cd066a77a9c9}', # 3.0.1c1 - '3.0.1150':'{de2f2d9c-53e2-40ee-8209-74da63cb060e}', # 3.0.1 - '3.0.2121':'{cef79e7f-9809-49e2-afd2-e24148d7c855}', # 3.0.2c1 - '3.0.2150':'{0cf3b95a-8382-4607-9779-c36407ff362c}', # 3.0.2 - '3.1.101': '{c423eada-c498-4d51-9eb4-bfeae647e0a0}', # 3.1a1 - '3.1.102': '{f6e199bf-dc64-42f3-87d4-1525991a013e}', # 3.1a2 - '3.1.111': '{c3c82893-69b2-4676-8554-1b6ee6c191e9}', # 3.1b1 - '3.1.121': '{da2b5170-12f3-4d99-8a1f-54926cca7acd}', # 3.1c1 - '3.1.122': '{bceb5133-e2ee-4109-951f-ac7e941a1692}', # 3.1c2 - '3.1.150': '{3ad61ee5-81d2-4d7e-adef-da1dd37277d1}', # 3.1.0 - '3.1.1121':'{5782f957-6d49-41d4-bad0-668715dfd638}', # 3.1.1c1 - '3.1.1150':'{7ff90460-89b7-435b-b583-b37b2815ccc7}', # 3.1.1 - '3.1.2121':'{ec45624a-378c-43be-91f3-3f7a59b0d90c}', # 3.1.2c1 - '3.1.2150':'{d40af016-506c-43fb-a738-bd54fa8c1e85}', # 3.1.2 - '3.2.101' :'{b411f168-7a36-4fff-902c-a554d1c78a4f}', # 3.2a1 - '3.2.102' :'{79ff73b7-8359-410f-b9c5-152d2026f8c8}', # 3.2a2 - '3.2.103' :'{e7635c65-c221-4b9b-b70a-5611b8369d77}', # 3.2a3 - '3.2.104' :'{748cd139-75b8-4ca8-98a7-58262298181e}', # 3.2a4 - '3.2.111' :'{20bfc16f-c7cd-4fc0-8f96-9914614a3c50}', # 3.2b1 - '3.2.112' :'{0e350c98-8d73-4993-b686-cfe87160046e}', # 3.2b2 - '3.2.121' :'{2094968d-7583-47f6-a7fd-22304532e09f}', # 3.2rc1 - '3.2.122' :'{4f3edfa6-cf70-469a-825f-e1206aa7f412}', # 3.2rc2 - '3.2.123' :'{90c673d7-8cfd-4969-9816-f7d70bad87f3}', # 3.2rc3 - '3.2.150' :'{b2042d5e-986d-44ec-aee3-afe4108ccc93}', # 3.2.0 - '3.2.1121':'{4f90de4a-83dd-4443-b625-ca130ff361dd}', # 3.2.1rc1 - '3.2.1122':'{dc5eb04d-ff8a-4bed-8f96-23942fd59e5f}', # 3.2.1rc2 - '3.2.1150':'{34b2530c-6349-4292-9dc3-60bda4aed93c}', # 3.2.1 - '3.2.2121':'{DFB29A53-ACC4-44e6-85A6-D0DA26FE8E4E}', # 3.2.2rc1 - '3.2.2150':'{4CDE3168-D060-4b7c-BC74-4D8F9BB01AFD}', # 3.2.2 - '3.2.3121':'{B8E8CFF7-E4C6-4a7c-9F06-BB3A8B75DDA8}', # 3.2.3rc1 - '3.2.3122':'{E8DCD3E0-12B6-4fb7-9DB5-543C2E67372E}', # 3.2.3rc2 - '3.2.3150':'{789C9644-9F82-44d3-B4CA-AC31F46F5882}', # 3.2.3 - '3.2.4121':'{F66CC280-12B0-487a-ADCF-CC049B0C5710}', # 3.2.4rc1 - '3.2.4150':'{871512A4-EB98-4c50-9E55-9DA530A6B09A}', # 3.2.4 - -} diff --git a/Tools/parser/test_unparse.py b/Tools/parser/test_unparse.py index 2ac1ea6..be84400 100644 --- a/Tools/parser/test_unparse.py +++ b/Tools/parser/test_unparse.py @@ -2,9 +2,10 @@ import unittest import test.support import io import os +import random import tokenize -import ast import unparse +import ast def read_pyfile(filename): """Read and return the contents of a Python source file (as a @@ -93,6 +94,21 @@ finally: suite5 """ +with_simple = """\ +with f(): + suite1 +""" + +with_as = """\ +with f() as x: + suite1 +""" + +with_two_items = """\ +with f() as x, g() as y: + suite1 +""" + class ASTTestCase(unittest.TestCase): def assertASTEqual(self, ast1, ast2): self.assertEqual(ast.dump(ast1), ast.dump(ast2)) @@ -215,6 +231,15 @@ class UnparseTestCase(ASTTestCase): self.check_roundtrip("a, *b[0], c = seq") self.check_roundtrip("a, *(b, c) = seq") + def test_with_simple(self): + self.check_roundtrip(with_simple) + + def test_with_as(self): + self.check_roundtrip(with_as) + + def test_with_two_items(self): + self.check_roundtrip(with_two_items) + class DirectoryTestCase(ASTTestCase): """Test roundtrip behaviour on all files in Lib and Lib/test.""" @@ -233,6 +258,10 @@ class DirectoryTestCase(ASTTestCase): if n.endswith('.py') and not n.startswith('bad'): names.append(os.path.join(test_dir, n)) + # Test limited subset of files unless the 'cpu' resource is specified. + if not test.support.is_resource_enabled("cpu"): + names = random.sample(names, 10) + for filename in names: if test.support.verbose: print('Testing %s' % filename) diff --git a/Tools/parser/unparse.py b/Tools/parser/unparse.py index d9fca97..b55e2c6 100644 --- a/Tools/parser/unparse.py +++ b/Tools/parser/unparse.py @@ -1,6 +1,5 @@ "Usage: unparse.py <path to source file>" import sys -import math import ast import tokenize import io @@ -147,6 +146,14 @@ class Unparser: self.dispatch(t.value) self.write(")") + def _YieldFrom(self, t): + self.write("(") + self.write("yield from") + if t.value: + self.write(" ") + self.dispatch(t.value) + self.write(")") + def _Raise(self, t): self.fill("raise") if not t.exc: @@ -158,12 +165,11 @@ class Unparser: self.write(" from ") self.dispatch(t.cause) - def _TryExcept(self, t): + def _Try(self, t): self.fill("try") self.enter() self.dispatch(t.body) self.leave() - for ex in t.handlers: self.dispatch(ex) if t.orelse: @@ -171,22 +177,12 @@ class Unparser: self.enter() self.dispatch(t.orelse) self.leave() - - def _TryFinally(self, t): - if len(t.body) == 1 and isinstance(t.body[0], ast.TryExcept): - # try-except-finally - self.dispatch(t.body) - else: - self.fill("try") + if t.finalbody: + self.fill("finally") self.enter() - self.dispatch(t.body) + self.dispatch(t.finalbody) self.leave() - self.fill("finally") - self.enter() - self.dispatch(t.finalbody) - self.leave() - def _ExceptHandler(self, t): self.fill("except") if t.type: @@ -296,10 +292,7 @@ class Unparser: def _With(self, t): self.fill("with ") - self.dispatch(t.context_expr) - if t.optional_vars: - self.write(" as ") - self.dispatch(t.optional_vars) + interleave(lambda: self.write(", "), self.dispatch, t.items) self.enter() self.dispatch(t.body) self.leave() @@ -564,6 +557,12 @@ class Unparser: if t.asname: self.write(" as "+t.asname) + def _withitem(self, t): + self.dispatch(t.context_expr) + if t.optional_vars: + self.write(" as ") + self.dispatch(t.optional_vars) + def roundtrip(filename, output=sys.stdout): with open(filename, "rb") as pyfile: encoding = tokenize.detect_encoding(pyfile.readline)[0] diff --git a/Tools/pybench/CommandLine.py b/Tools/pybench/CommandLine.py index 715bc49..073cca0 100644 --- a/Tools/pybench/CommandLine.py +++ b/Tools/pybench/CommandLine.py @@ -462,7 +462,7 @@ class Application: handler = getattr(self, handlername) except AttributeError: if value == '': - # count the number of occurances + # count the number of occurrences if optionname in values: values[optionname] = values[optionname] + 1 else: diff --git a/Tools/pybench/Setup.py b/Tools/pybench/Setup.py index 21e654a..21e654a 100644..100755 --- a/Tools/pybench/Setup.py +++ b/Tools/pybench/Setup.py diff --git a/Tools/pybench/clockres.py b/Tools/pybench/clockres.py index d7f1ac8..d7f1ac8 100644..100755 --- a/Tools/pybench/clockres.py +++ b/Tools/pybench/clockres.py diff --git a/Tools/pybench/pybench.py b/Tools/pybench/pybench.py index 8eaad63..942f56d 100755 --- a/Tools/pybench/pybench.py +++ b/Tools/pybench/pybench.py @@ -35,7 +35,9 @@ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE ! """ -import sys, time, operator, platform +import sys +import time +import platform from CommandLine import * try: @@ -73,11 +75,15 @@ ALLOW_SKIPPING_CALIBRATION = 1 # Timer types TIMER_TIME_TIME = 'time.time' +TIMER_TIME_PROCESS_TIME = 'time.process_time' +TIMER_TIME_PERF_COUNTER = 'time.perf_counter' TIMER_TIME_CLOCK = 'time.clock' TIMER_SYSTIMES_PROCESSTIME = 'systimes.processtime' # Choose platform default timer -if sys.platform[:3] == 'win': +if hasattr(time, 'perf_counter'): + TIMER_PLATFORM_DEFAULT = TIMER_TIME_PERF_COUNTER +elif sys.platform[:3] == 'win': # On WinXP this has 2.5ms resolution TIMER_PLATFORM_DEFAULT = TIMER_TIME_CLOCK else: @@ -93,6 +99,10 @@ def get_timer(timertype): if timertype == TIMER_TIME_TIME: return time.time + elif timertype == TIMER_TIME_PROCESS_TIME: + return time.process_time + elif timertype == TIMER_TIME_PERF_COUNTER: + return time.perf_counter elif timertype == TIMER_TIME_CLOCK: return time.clock elif timertype == TIMER_SYSTIMES_PROCESSTIME: @@ -107,6 +117,7 @@ def get_machine_details(): print('Getting machine details...') buildno, builddate = platform.python_build() python = platform.python_version() + # XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+? if sys.maxunicode == 65535: # UCS2 build (standard) unitype = 'UCS2' @@ -865,7 +876,18 @@ python pybench.py -s p25.pybench -c p21.pybench print('* using timer: systimes.processtime (%s)' % \ systimes.SYSTIMES_IMPLEMENTATION) else: + # Check that the clock function does exist + try: + get_timer(timer) + except TypeError: + print("* Error: Unknown timer: %s" % timer) + return + print('* using timer: %s' % timer) + if hasattr(time, 'get_clock_info'): + info = time.get_clock_info(timer[5:]) + print('* timer: resolution=%s, implementation=%s' + % (info.resolution, info.implementation)) print() @@ -943,8 +965,6 @@ python pybench.py -s p25.pybench -c p21.pybench pickle.dump(bench,f) f.close() except IOError as reason: - print('* Error opening/writing reportfile') - except IOError as reason: print('* Error opening/writing reportfile %s: %s' % ( reportfile, reason)) diff --git a/Tools/pybench/systimes.py b/Tools/pybench/systimes.py index 6bc7e80..5e00891 100644..100755 --- a/Tools/pybench/systimes.py +++ b/Tools/pybench/systimes.py @@ -5,7 +5,7 @@ This module implements various different strategies for measuring performance timings. It tries to choose the best available method - based on the platforma and available tools. + based on the platform and available tools. On Windows, it is recommended to have the Mark Hammond win32 package installed. Alternatively, the Thomas Heller ctypes diff --git a/Tools/pynche/DetailsViewer.py b/Tools/pynche/DetailsViewer.py index fdc79b7..bed11f4 100644 --- a/Tools/pynche/DetailsViewer.py +++ b/Tools/pynche/DetailsViewer.py @@ -26,7 +26,7 @@ option menu: other side. Thus if red were at 238 and 25 were added to it, red would have the value 7. - Preseve Distance + Preserve Distance When the increment or decrement would send any of the tied variations out of bounds, all tied variations are wrapped as one, so as to preserve the distance between them. Thus if green and blue were tied, diff --git a/Tools/scripts/README b/Tools/scripts/README index 8c02529..d65d1fd 100644 --- a/Tools/scripts/README +++ b/Tools/scripts/README @@ -15,7 +15,7 @@ db2pickle.py Dump a database file to a pickle diff.py Print file diffs in context, unified, or ndiff formats dutree.py Format du(1) output as a tree sorted by size eptags.py Create Emacs TAGS file for Python modules -find_recursionlimit.py Find the maximum recursion limit on this machine +find_recursionlimit.py Find the maximum recursion limit on this machine finddiv.py A grep-like tool that looks for division operators findlinksto.py Recursively find symbolic links to a given path prefix findnocoding.py Find source files which need an encoding declaration @@ -28,6 +28,7 @@ ftpmirror.py FTP mirror script google.py Open a webbrowser with Google gprof2html.py Transform gprof(1) output into useful HTML h2py.py Translate #define's into Python assignments +highlight.py Python syntax highlighting with HTML output idle3 Main program to start IDLE ifdef.py Remove #if(n)def groups from C sources lfcr.py Change LF line endings to CRLF (Unix to Windows) @@ -53,6 +54,7 @@ redemo.py Basic regular expression demonstration facility reindent.py Change .py files to use 4-space indents reindent-rst.py Fix-up reStructuredText file whitespace rgrep.py Reverse grep through a file (useful for big logfiles) +run_tests.py Run the test suite with more sensible default options serve.py Small wsgiref-based web server, used in make serve in Doc suff.py Sort a list of files by suffix svneol.py Set svn:eol-style on all files in directory diff --git a/Tools/scripts/abitype.py b/Tools/scripts/abitype.py index ab0ba42..d6a74a1 100755 --- a/Tools/scripts/abitype.py +++ b/Tools/scripts/abitype.py @@ -126,8 +126,8 @@ typeslots = [ 'tp_cache', 'tp_subclasses', 'tp_weaklist', - 'tp_del' - 'tp_version_tag' + 'tp_del', + 'tp_version_tag', ] # Generate a PyType_Spec definition @@ -194,7 +194,7 @@ if __name__ == '__main__': break start = m.start() end = m.end() - name, fields = get_fields(start, m) + name, fields = get_fields(start, end) tokens[start:end] = [('',make_slots(name, fields))] # Output result to stdout diff --git a/Tools/scripts/diff.py b/Tools/scripts/diff.py index 9efb078..f9b14bf 100755 --- a/Tools/scripts/diff.py +++ b/Tools/scripts/diff.py @@ -9,6 +9,12 @@ """ import sys, os, time, difflib, optparse +from datetime import datetime, timezone + +def file_mtime(path): + t = datetime.fromtimestamp(os.stat(path).st_mtime, + timezone.utc) + return t.astimezone().isoformat() def main(): @@ -30,10 +36,12 @@ def main(): n = options.lines fromfile, tofile = args - fromdate = time.ctime(os.stat(fromfile).st_mtime) - todate = time.ctime(os.stat(tofile).st_mtime) - fromlines = open(fromfile, 'U').readlines() - tolines = open(tofile, 'U').readlines() + fromdate = file_mtime(fromfile) + todate = file_mtime(tofile) + with open(fromfile, 'U') as ff: + fromlines = ff.readlines() + with open(tofile, 'U') as tf: + tolines = tf.readlines() if options.u: diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n) diff --git a/Tools/scripts/find_recursionlimit.py b/Tools/scripts/find_recursionlimit.py index 7a86603..1171146 100755 --- a/Tools/scripts/find_recursionlimit.py +++ b/Tools/scripts/find_recursionlimit.py @@ -89,6 +89,12 @@ def test_cpickle(_cache={}): _pickle.Pickler(io.BytesIO(), protocol=-1).dump(l) _cache[n] = l +def test_compiler_recursion(): + # The compiler uses a scaling factor to support additional levels + # of recursion. This is a sanity check of that scaling to ensure + # it still raises RuntimeError even at higher recursion limits + compile("()" * (10 * sys.getrecursionlimit()), "<single>", "single") + def check_limit(n, test_func_name): sys.setrecursionlimit(n) if test_func_name.startswith("test_"): @@ -117,5 +123,6 @@ if __name__ == '__main__': check_limit(limit, "test_getattr") check_limit(limit, "test_getitem") check_limit(limit, "test_cpickle") + check_limit(limit, "test_compiler_recursion") print("Limit of %d is fine" % limit) limit = limit + 100 diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py index a494a48..5f3795e 100755 --- a/Tools/scripts/findnocoding.py +++ b/Tools/scripts/findnocoding.py @@ -2,7 +2,7 @@ """List all those Python files that require a coding directive -Usage: nocoding.py dir1 [dir2...] +Usage: findnocoding.py dir1 [dir2...] """ __author__ = "Oleg Broytmann, Georg Brandl" @@ -32,13 +32,14 @@ except ImportError: "no sophisticated Python source file search will be done.", file=sys.stderr) -decl_re = re.compile(r"coding[=:]\s*([-\w.]+)") +decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') +blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)') def get_declaration(line): - match = decl_re.search(line) + match = decl_re.match(line) if match: return match.group(1) - return '' + return b'' def has_correct_encoding(text, codec): try: @@ -50,21 +51,21 @@ def has_correct_encoding(text, codec): def needs_declaration(fullpath): try: - infile = open(fullpath, 'rU') + infile = open(fullpath, 'rb') except IOError: # Oops, the file was removed - ignore it return None - line1 = infile.readline() - line2 = infile.readline() + with infile: + line1 = infile.readline() + line2 = infile.readline() - if get_declaration(line1) or get_declaration(line2): - # the file does have an encoding declaration, so trust it - infile.close() - return False + if (get_declaration(line1) or + blank_re.match(line1) and get_declaration(line2)): + # the file does have an encoding declaration, so trust it + return False - # check the whole file for non utf-8 characters - rest = infile.read() - infile.close() + # check the whole file for non utf-8 characters + rest = infile.read() if has_correct_encoding(line1+line2+rest, "utf-8"): return False diff --git a/Tools/scripts/fixnotice.py b/Tools/scripts/fixnotice.py index aac8697..ad967f9 100755 --- a/Tools/scripts/fixnotice.py +++ b/Tools/scripts/fixnotice.py @@ -2,7 +2,7 @@ """(Ostensibly) fix copyright notices in files. -Actually, this sript will simply replace a block of text in a file from one +Actually, this script will simply replace a block of text in a file from one string to another. It will only do this once though, i.e. not globally throughout the file. It writes a backup file and then does an os.rename() dance for atomicity. diff --git a/Tools/scripts/gprof2html.py b/Tools/scripts/gprof2html.py index 6c899d9..ad82835 100755 --- a/Tools/scripts/gprof2html.py +++ b/Tools/scripts/gprof2html.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python32.3 +#! /usr/bin/env python3 """Transform gprof(1) output into useful HTML.""" diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py new file mode 100755 index 0000000..aff5cae --- /dev/null +++ b/Tools/scripts/highlight.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +'''Add syntax highlighting to Python source code''' + +__author__ = 'Raymond Hettinger' + +import keyword, tokenize, cgi, re, functools +try: + import builtins +except ImportError: + import __builtin__ as builtins + +#### Analyze Python Source ################################# + +def is_builtin(s): + 'Return True if s is the name of a builtin' + return hasattr(builtins, s) + +def combine_range(lines, start, end): + 'Join content from a range of lines between start and end' + (srow, scol), (erow, ecol) = start, end + if srow == erow: + return lines[srow-1][scol:ecol], end + rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] + return ''.join(rows), end + +def analyze_python(source): + '''Generate and classify chunks of Python for syntax highlighting. + Yields tuples in the form: (category, categorized_text). + ''' + lines = source.splitlines(True) + lines.append('') + readline = functools.partial(next, iter(lines), '') + kind = tok_str = '' + tok_type = tokenize.COMMENT + written = (1, 0) + for tok in tokenize.generate_tokens(readline): + prev_tok_type, prev_tok_str = tok_type, tok_str + tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok + kind = '' + if tok_type == tokenize.COMMENT: + kind = 'comment' + elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@': + kind = 'operator' + elif tok_type == tokenize.STRING: + kind = 'string' + if prev_tok_type == tokenize.INDENT or scol==0: + kind = 'docstring' + elif tok_type == tokenize.NAME: + if tok_str in ('def', 'class', 'import', 'from'): + kind = 'definition' + elif prev_tok_str in ('def', 'class'): + kind = 'defname' + elif keyword.iskeyword(tok_str): + kind = 'keyword' + elif is_builtin(tok_str) and prev_tok_str != '.': + kind = 'builtin' + if kind: + text, written = combine_range(lines, written, (srow, scol)) + yield '', text + text, written = tok_str, (erow, ecol) + yield kind, text + line_upto_token, written = combine_range(lines, written, (erow, ecol)) + yield '', line_upto_token + +#### Raw Output ########################################### + +def raw_highlight(classified_text): + 'Straight text display of text classifications' + result = [] + for kind, text in classified_text: + result.append('%15s: %r\n' % (kind or 'plain', text)) + return ''.join(result) + +#### ANSI Output ########################################### + +default_ansi = { + 'comment': ('\033[0;31m', '\033[0m'), + 'string': ('\033[0;32m', '\033[0m'), + 'docstring': ('\033[0;32m', '\033[0m'), + 'keyword': ('\033[0;33m', '\033[0m'), + 'builtin': ('\033[0;35m', '\033[0m'), + 'definition': ('\033[0;33m', '\033[0m'), + 'defname': ('\033[0;34m', '\033[0m'), + 'operator': ('\033[0;33m', '\033[0m'), +} + +def ansi_highlight(classified_text, colors=default_ansi): + 'Add syntax highlighting to source code using ANSI escape sequences' + # http://en.wikipedia.org/wiki/ANSI_escape_code + result = [] + for kind, text in classified_text: + opener, closer = colors.get(kind, ('', '')) + result += [opener, text, closer] + return ''.join(result) + +#### HTML Output ########################################### + +def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'): + 'Convert classified text to an HTML fragment' + result = [opener] + for kind, text in classified_text: + if kind: + result.append('<span class="%s">' % kind) + result.append(cgi.escape(text)) + if kind: + result.append('</span>') + result.append(closer) + return ''.join(result) + +default_css = { + '.comment': '{color: crimson;}', + '.string': '{color: forestgreen;}', + '.docstring': '{color: forestgreen; font-style:italic;}', + '.keyword': '{color: darkorange;}', + '.builtin': '{color: purple;}', + '.definition': '{color: darkorange; font-weight:bold;}', + '.defname': '{color: blue;}', + '.operator': '{color: brown;}', +} + +default_html = '''\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" + "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<meta http-equiv="Content-type" content="text/html;charset=UTF-8"> +<title> {title} </title> +<style type="text/css"> +{css} +</style> +</head> +<body> +{body} +</body> +</html> +''' + +def build_html_page(classified_text, title='python', + css=default_css, html=default_html): + 'Create a complete HTML page with colorized source code' + css_str = '\n'.join(['%s %s' % item for item in css.items()]) + result = html_highlight(classified_text) + title = cgi.escape(title) + return html.format(title=title, css=css_str, body=result) + +#### LaTeX Output ########################################## + +default_latex_commands = { + 'comment': '{\color{red}#1}', + 'string': '{\color{ForestGreen}#1}', + 'docstring': '{\emph{\color{ForestGreen}#1}}', + 'keyword': '{\color{orange}#1}', + 'builtin': '{\color{purple}#1}', + 'definition': '{\color{orange}#1}', + 'defname': '{\color{blue}#1}', + 'operator': '{\color{brown}#1}', +} + +default_latex_document = r''' +\documentclass{article} +\usepackage{alltt} +\usepackage{upquote} +\usepackage{color} +\usepackage[usenames,dvipsnames]{xcolor} +\usepackage[cm]{fullpage} +%(macros)s +\begin{document} +\center{\LARGE{%(title)s}} +\begin{alltt} +%(body)s +\end{alltt} +\end{document} +''' + +def alltt_escape(s): + 'Replace backslash and braces with their escaped equivalents' + xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'} + return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s) + +def latex_highlight(classified_text, title = 'python', + commands = default_latex_commands, + document = default_latex_document): + 'Create a complete LaTeX document with colorized source code' + macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items()) + result = [] + for kind, text in classified_text: + if kind: + result.append(r'\py%s{' % kind) + result.append(alltt_escape(text)) + if kind: + result.append('}') + return default_latex_document % dict(title=title, macros=macros, body=''.join(result)) + + +if __name__ == '__main__': + import sys, argparse, webbrowser, os, textwrap + + parser = argparse.ArgumentParser( + description = 'Add syntax highlighting to Python source code', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog = textwrap.dedent(''' + examples: + + # Show syntax highlighted code in the terminal window + $ ./highlight.py myfile.py + + # Colorize myfile.py and display in a browser + $ ./highlight.py -b myfile.py + + # Create an HTML section to embed in an existing webpage + ./highlight.py -s myfile.py + + # Create a complete HTML file + $ ./highlight.py -c myfile.py > myfile.html + + # Create a PDF using LaTeX + $ ./highlight.py -l myfile.py | pdflatex + + ''')) + parser.add_argument('sourcefile', metavar = 'SOURCEFILE', + help = 'file containing Python sourcecode') + parser.add_argument('-b', '--browser', action = 'store_true', + help = 'launch a browser to show results') + parser.add_argument('-c', '--complete', action = 'store_true', + help = 'build a complete html webpage') + parser.add_argument('-l', '--latex', action = 'store_true', + help = 'build a LaTeX document') + parser.add_argument('-r', '--raw', action = 'store_true', + help = 'raw parse of categorized text') + parser.add_argument('-s', '--section', action = 'store_true', + help = 'show an HTML section rather than a complete webpage') + args = parser.parse_args() + + if args.section and (args.browser or args.complete): + parser.error('The -s/--section option is incompatible with ' + 'the -b/--browser or -c/--complete options') + + sourcefile = args.sourcefile + with open(sourcefile) as f: + source = f.read() + classified_text = analyze_python(source) + + if args.raw: + encoded = raw_highlight(classified_text) + elif args.complete or args.browser: + encoded = build_html_page(classified_text, title=sourcefile) + elif args.section: + encoded = html_highlight(classified_text) + elif args.latex: + encoded = latex_highlight(classified_text, title=sourcefile) + else: + encoded = ansi_highlight(classified_text) + + if args.browser: + htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html' + with open(htmlfile, 'w') as f: + f.write(encoded) + webbrowser.open('file://' + os.path.abspath(htmlfile)) + else: + sys.stdout.write(encoded) diff --git a/Tools/scripts/ifdef.py b/Tools/scripts/ifdef.py index 46167ad..b1711ce 100755 --- a/Tools/scripts/ifdef.py +++ b/Tools/scripts/ifdef.py @@ -9,11 +9,11 @@ # options. On standard output it writes a copy of the input file(s) # minus those code sections that are suppressed by the selected # combination of defined/undefined symbols. The #if(n)def/#else/#else -# lines themselfs (if the #if(n)def tests for one of the mentioned +# lines themselves (if the #if(n)def tests for one of the mentioned # names) are removed as well. # Features: Arbitrary nesting of recognized and unrecognized -# preprocesor statements works correctly. Unrecognized #if* commands +# preprocessor statements works correctly. Unrecognized #if* commands # are left in place, so it will never remove too much, only too # little. It does accept whitespace around the '#' character. diff --git a/Tools/scripts/import_diagnostics.py b/Tools/scripts/import_diagnostics.py new file mode 100755 index 0000000..c907221 --- /dev/null +++ b/Tools/scripts/import_diagnostics.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Miscellaneous diagnostics for the import system""" + +import sys +import argparse +from pprint import pprint + +def _dump_state(args): + print(sys.version) + for name in args.attributes: + print("sys.{}:".format(name)) + pprint(getattr(sys, name)) + +def _add_dump_args(cmd): + cmd.add_argument("attributes", metavar="ATTR", nargs="+", + help="sys module attribute to display") + +COMMANDS = ( + ("dump", "Dump import state", _dump_state, _add_dump_args), +) + +def _make_parser(): + parser = argparse.ArgumentParser() + sub = parser.add_subparsers(title="Commands") + for name, description, implementation, add_args in COMMANDS: + cmd = sub.add_parser(name, help=description) + cmd.set_defaults(command=implementation) + add_args(cmd) + return parser + +def main(args): + parser = _make_parser() + args = parser.parse_args(args) + return args.command(args) + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/Tools/scripts/patchcheck.py b/Tools/scripts/patchcheck.py index 6a39145..6f9821b 100755 --- a/Tools/scripts/patchcheck.py +++ b/Tools/scripts/patchcheck.py @@ -49,29 +49,15 @@ def mq_patches_applied(): @status("Getting the list of files that have been added/changed", info=lambda x: n_files_str(len(x))) def changed_files(): - """Get the list of changed or added files from the VCS.""" - if os.path.isdir(os.path.join(SRCDIR, '.hg')): - vcs = 'hg' - cmd = 'hg status --added --modified --no-status' - if mq_patches_applied(): - cmd += ' --rev qparent' - elif os.path.isdir('.svn'): - vcs = 'svn' - cmd = 'svn status --quiet --non-interactive --ignore-externals' - else: + """Get the list of changed or added files from Mercurial.""" + if not os.path.isdir(os.path.join(SRCDIR, '.hg')): sys.exit('need a checkout to get modified files') - st = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) - try: - st.wait() - if vcs == 'hg': - return [x.decode().rstrip() for x in st.stdout] - else: - output = (x.decode().rstrip().rsplit(None, 1)[-1] - for x in st.stdout if x[0] in b'AM') - return set(path for path in output if os.path.isfile(path)) - finally: - st.stdout.close() + cmd = 'hg status --added --modified --no-status' + if mq_patches_applied(): + cmd += ' --rev qparent' + with subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) as st: + return [x.decode().rstrip() for x in st.stdout] def report_modified_files(file_paths): @@ -89,10 +75,8 @@ def report_modified_files(file_paths): def normalize_whitespace(file_paths): """Make sure that the whitespace for .py files have been normalized.""" reindent.makebackup = False # No need to create backups. - fixed = [] - for path in (x for x in file_paths if x.endswith('.py')): - if reindent.check(os.path.join(SRCDIR, path)): - fixed.append(path) + fixed = [path for path in file_paths if path.endswith('.py') and + reindent.check(os.path.join(SRCDIR, path))] return fixed @@ -140,21 +124,37 @@ def docs_modified(file_paths): @status("Misc/ACKS updated", modal=True) def credit_given(file_paths): """Check if Misc/ACKS has been changed.""" - return 'Misc/ACKS' in file_paths + return os.path.join('Misc', 'ACKS') in file_paths @status("Misc/NEWS updated", modal=True) def reported_news(file_paths): """Check if Misc/NEWS has been changed.""" - return 'Misc/NEWS' in file_paths + return os.path.join('Misc', 'NEWS') in file_paths +@status("configure regenerated", modal=True, info=str) +def regenerated_configure(file_paths): + """Check if configure has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'configure' in file_paths else "no" + else: + return "not needed" + +@status("pyconfig.h.in regenerated", modal=True, info=str) +def regenerated_pyconfig_h_in(file_paths): + """Check if pyconfig.h.in has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'pyconfig.h.in' in file_paths else "no" + else: + return "not needed" def main(): file_paths = changed_files() python_files = [fn for fn in file_paths if fn.endswith('.py')] c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))] doc_files = [fn for fn in file_paths if fn.startswith('Doc')] - special_files = {'Misc/ACKS', 'Misc/NEWS'} & set(file_paths) + misc_files = {os.path.join('Misc', 'ACKS'), os.path.join('Misc', 'NEWS')}\ + & set(file_paths) # PEP 8 whitespace rules enforcement. normalize_whitespace(python_files) # C rules enforcement. @@ -164,9 +164,13 @@ def main(): # Docs updated. docs_modified(doc_files) # Misc/ACKS changed. - credit_given(special_files) + credit_given(misc_files) # Misc/NEWS changed. - reported_news(special_files) + reported_news(misc_files) + # Regenerated configure, if necessary. + regenerated_configure(file_paths) + # Regenerated pyconfig.h.in, if necessary. + regenerated_pyconfig_h_in(file_paths) # Test suite run and passed. if python_files or c_files: diff --git a/Tools/scripts/pathfix.py b/Tools/scripts/pathfix.py index dd08e0a..13ca866 100755 --- a/Tools/scripts/pathfix.py +++ b/Tools/scripts/pathfix.py @@ -151,7 +151,7 @@ def fix(filename): except os.error as msg: err('%s: reset of timestamp failed (%r)\n' % (filename, msg)) return 1 - # Return succes + # Return success return 0 def fixline(line): diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py index 048131e..69e8e0d 100755 --- a/Tools/scripts/pysource.py +++ b/Tools/scripts/pysource.py @@ -22,7 +22,7 @@ __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_pytho import os, re -binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]') +binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]') debug = False @@ -42,7 +42,7 @@ def _open(fullpath): return None try: - return open(fullpath, 'rU') + return open(fullpath, "rb") except IOError as err: # Access denied, or a special file - ignore it print_debug("%s: access denied: %s" % (fullpath, err)) return None @@ -55,8 +55,8 @@ def looks_like_python(fullpath): if infile is None: return False - line = infile.readline() - infile.close() + with infile: + line = infile.readline() if binary_re.search(line): # file appears to be binary @@ -65,7 +65,7 @@ def looks_like_python(fullpath): if fullpath.endswith(".py") or fullpath.endswith(".pyw"): return True - elif "python" in line: + elif b"python" in line: # disguised Python script (e.g. CGI) return True @@ -76,8 +76,8 @@ def can_be_compiled(fullpath): if infile is None: return False - code = infile.read() - infile.close() + with infile: + code = infile.read() try: compile(code, fullpath, "exec") diff --git a/Tools/scripts/pyvenv b/Tools/scripts/pyvenv new file mode 100755 index 0000000..978d691 --- /dev/null +++ b/Tools/scripts/pyvenv @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import sys + rc = 1 + try: + import venv + venv.main() + rc = 0 + except Exception as e: + print('Error: %s' % e, file=sys.stderr) + sys.exit(rc) diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py index b18993b..4a916ea 100755 --- a/Tools/scripts/reindent.py +++ b/Tools/scripts/reindent.py @@ -8,6 +8,8 @@ -r (--recurse) Recurse. Search for all .py files in subdirectories too. -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. -v (--verbose) Verbose. Print informative msgs; else no output. + (--newline) Newline. Specify the newline character to use (CRLF, LF). + Default is the same as the original file. -h (--help) Help. Print this usage information and exit. Change Python (.py) files to use 4-space indents and no hard tab characters. @@ -50,6 +52,8 @@ verbose = False recurse = False dryrun = False makebackup = True +spec_newline = None +"""A specified newline to be used in the output (set by --newline option)""" def usage(msg=None): @@ -62,13 +66,12 @@ def errprint(*args): sys.stderr.write(" ".join(str(arg) for arg in args)) sys.stderr.write("\n") - def main(): import getopt - global verbose, recurse, dryrun, makebackup + global verbose, recurse, dryrun, makebackup, spec_newline try: opts, args = getopt.getopt(sys.argv[1:], "drnvh", - ["dryrun", "recurse", "nobackup", "verbose", "help"]) + ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) except getopt.error as msg: usage(msg) return @@ -81,6 +84,11 @@ def main(): makebackup = False elif o in ('-v', '--verbose'): verbose = True + elif o in ('--newline',): + if not a.upper() in ('CRLF', 'LF'): + usage() + return + spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] elif o in ('-h', '--help'): usage() return @@ -118,9 +126,9 @@ def check(file): errprint("%s: I/O Error: %s" % (file, str(msg))) return - newline = r.newlines + newline = spec_newline if spec_newline else r.newlines if isinstance(newline, tuple): - errprint("%s: mixed newlines detected; cannot process file" % file) + errprint("%s: mixed newlines detected; cannot continue without --newline" % file) return if r.run(): diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py new file mode 100644 index 0000000..a6c5da3 --- /dev/null +++ b/Tools/scripts/run_tests.py @@ -0,0 +1,57 @@ +"""Run Python's test suite in a fast, rigorous way. + +The defaults are meant to be reasonably thorough, while skipping certain +tests that can be time-consuming or resource-intensive (e.g. largefile), +or distracting (e.g. audio and gui). These defaults can be overridden by +simply passing a -u option to this script. + +""" + +import os +import sys +import test.support +try: + import threading +except ImportError: + threading = None + + +def is_multiprocess_flag(arg): + return arg.startswith('-j') or arg.startswith('--multiprocess') + + +def is_resource_use_flag(arg): + return arg.startswith('-u') or arg.startswith('--use') + + +def main(regrtest_args): + args = [sys.executable, + '-W', 'default', # Warnings set to 'default' + '-bb', # Warnings about bytes/bytearray + '-E', # Ignore environment variables + ] + # Allow user-specified interpreter options to override our defaults. + args.extend(test.support.args_from_interpreter_flags()) + + # Workaround for issue #20355 + os.environ.pop("PYTHONWARNINGS", None) + # Workaround for issue #20361 + args.extend(['-W', 'error::BytesWarning']) + + args.extend(['-m', 'test', # Run the test suite + '-r', # Randomize test order + '-w', # Re-run failed tests in verbose mode + ]) + if sys.platform == 'win32': + args.append('-n') # Silence alerts under Windows + if threading and not any(is_multiprocess_flag(arg) for arg in regrtest_args): + args.extend(['-j', '0']) # Use all CPU cores + if not any(is_resource_use_flag(arg) for arg in regrtest_args): + args.extend(['-u', 'all,-largefile,-audio,-gui']) + args.extend(regrtest_args) + print(' '.join(args)) + os.execv(sys.executable, args) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/Tools/scripts/serve.py b/Tools/scripts/serve.py index 68c25f0..dae21f2 100755 --- a/Tools/scripts/serve.py +++ b/Tools/scripts/serve.py @@ -22,7 +22,7 @@ def app(environ, respond): return util.FileWrapper(open(fn, "rb")) else: respond('404 Not Found', [('Content-Type', 'text/plain')]) - return ['not found'] + return [b'not found'] if __name__ == '__main__': path = sys.argv[1] diff --git a/Tools/scripts/texi2html.py b/Tools/scripts/texi2html.py index af2147a..9983528 100755 --- a/Tools/scripts/texi2html.py +++ b/Tools/scripts/texi2html.py @@ -319,7 +319,7 @@ class TexinfoParser: # Start saving text in a buffer instead of writing it to a file def startsaving(self): - if self.savetext != None: + if self.savetext is not None: self.savestack.append(self.savetext) # print '*** Recursively saving text, expect trouble' self.savetext = '' @@ -341,7 +341,7 @@ class TexinfoParser: except: print(args) raise TypeError - if self.savetext != None: + if self.savetext is not None: self.savetext = self.savetext + text elif self.nodefp: self.nodefp.write(text) @@ -350,7 +350,7 @@ class TexinfoParser: # Complete the current node -- write footnotes and close file def endnode(self): - if self.savetext != None: + if self.savetext is not None: print('*** Still saving text at end of node') dummy = self.collectsavings() if self.footnotes: @@ -804,7 +804,7 @@ class TexinfoParser: def close_i(self): self.write('</I>') def open_footnote(self): - # if self.savetext <> None: + # if self.savetext is not None: # print '*** Recursive footnote -- expect weirdness' id = len(self.footnotes) + 1 self.write(self.FN_SOURCE_PATTERN % {'id': repr(id)}) @@ -1442,7 +1442,7 @@ class TexinfoParser: else: # some other character, e.g. '-' args = self.itemarg + ' ' + args - if self.itemnumber != None: + if self.itemnumber is not None: args = self.itemnumber + '. ' + args self.itemnumber = increment(self.itemnumber) if self.stack and self.stack[-1] == 'table': diff --git a/Tools/ssl/make_ssl_data.py b/Tools/ssl/make_ssl_data.py new file mode 100755 index 0000000..10244d1 --- /dev/null +++ b/Tools/ssl/make_ssl_data.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python3 + +""" +This script should be called *manually* when we want to upgrade SSLError +`library` and `reason` mnemnonics to a more recent OpenSSL version. + +It takes two arguments: +- the path to the OpenSSL include files' directory + (e.g. openssl-1.0.1-beta3/include/openssl/) +- the path to the C file to be generated + (probably Modules/_ssl_data.h) +""" + +import datetime +import os +import re +import sys + + +def parse_error_codes(h_file, prefix): + pat = re.compile(r"#define\W+(%s([\w]+))\W+(\d+)\b" % re.escape(prefix)) + codes = [] + with open(h_file, "r", encoding="latin1") as f: + for line in f: + match = pat.search(line) + if match: + code, name, num = match.groups() + num = int(num) + codes.append((code, name, num)) + return codes + +if __name__ == "__main__": + openssl_inc = sys.argv[1] + outfile = sys.argv[2] + use_stdout = outfile == '-' + f = sys.stdout if use_stdout else open(outfile, "w") + error_libraries = ( + # (library code, mnemonic, error prefix, header file) + ('ERR_LIB_PEM', 'PEM', 'PEM_R_', 'pem.h'), + ('ERR_LIB_SSL', 'SSL', 'SSL_R_', 'ssl.h'), + ('ERR_LIB_X509', 'X509', 'X509_R_', 'x509.h'), + ) + def w(l): + f.write(l + "\n") + w("/* File generated by Tools/ssl/make_ssl_data.py */") + w("/* Generated on %s */" % datetime.datetime.now().isoformat()) + w("") + + w("static struct py_ssl_library_code library_codes[] = {") + for libcode, mnemo, _, _ in error_libraries: + w(' {"%s", %s},' % (mnemo, libcode)) + w(' { NULL }') + w('};') + w("") + + w("static struct py_ssl_error_code error_codes[] = {") + for libcode, _, prefix, h_file in error_libraries: + codes = parse_error_codes(os.path.join(openssl_inc, h_file), prefix) + for code, name, num in sorted(codes): + w(' #ifdef %s' % (code)) + w(' {"%s", %s, %s},' % (name, libcode, code)) + w(' #else') + w(' {"%s", %s, %d},' % (name, libcode, num)) + w(' #endif') + w(' { NULL }') + w('};') + if not use_stdout: + f.close() diff --git a/Tools/stringbench/README b/Tools/stringbench/README new file mode 100644 index 0000000..a271f12 --- /dev/null +++ b/Tools/stringbench/README @@ -0,0 +1,68 @@ +stringbench is a set of performance tests comparing byte string +operations with unicode operations. The two string implementations +are loosely based on each other and sometimes the algorithm for one is +faster than the other. + +These test set was started at the Need For Speed sprint in Reykjavik +to identify which string methods could be sped up quickly and to +identify obvious places for improvement. + +Here is an example of a benchmark + + +@bench('"Andrew".startswith("A")', 'startswith single character', 1000) +def startswith_single(STR): + s1 = STR("Andrew") + s2 = STR("A") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + +The bench decorator takes three parameters. The first is a short +description of how the code works. In most cases this is Python code +snippet. It is not the code which is actually run because the real +code is hand-optimized to focus on the method being tested. + +The second parameter is a group title. All benchmarks with the same +group title are listed together. This lets you compare different +implementations of the same algorithm, such as "t in s" +vs. "s.find(t)". + +The last is a count. Each benchmark loops over the algorithm either +100 or 1000 times, depending on the algorithm performance. The output +time is the time per benchmark call so the reader needs a way to know +how to scale the performance. + +These parameters become function attributes. + + +Here is an example of the output + + +========== count newlines +38.54 41.60 92.7 ...text.with.2000.newlines.count("\n") (*100) +========== early match, single character +1.14 1.18 96.8 ("A"*1000).find("A") (*1000) +0.44 0.41 105.6 "A" in "A"*1000 (*1000) +1.15 1.17 98.1 ("A"*1000).index("A") (*1000) + +The first column is the run time in milliseconds for byte strings. +The second is the run time for unicode strings. The third is a +percentage; byte time / unicode time. It's the percentage by which +unicode is faster than byte strings. + +The last column contains the code snippet and the repeat count for the +internal benchmark loop. + +The times are computed with 'timeit.py' which repeats the test more +and more times until the total time takes over 0.2 seconds, returning +the best time for a single iteration. + +The final line of the output is the cumulative time for byte and +unicode strings, and the overall performance of unicode relative to +bytes. For example + +4079.83 5432.25 75.1 TOTAL + +However, this has no meaning as it evenly weights every test. + diff --git a/Tools/stringbench/stringbench.py b/Tools/stringbench/stringbench.py new file mode 100644 index 0000000..142b3ca --- /dev/null +++ b/Tools/stringbench/stringbench.py @@ -0,0 +1,1482 @@ + +# Various microbenchmarks comparing unicode and byte string performance +# Please keep this file both 2.x and 3.x compatible! + +import timeit +import itertools +import operator +import re +import sys +import datetime +import optparse + +VERSION = '2.0' + +def p(*args): + sys.stdout.write(' '.join(str(s) for s in args) + '\n') + +if sys.version_info >= (3,): + BYTES = bytes_from_str = lambda x: x.encode('ascii') + UNICODE = unicode_from_str = lambda x: x +else: + BYTES = bytes_from_str = lambda x: x + UNICODE = unicode_from_str = lambda x: x.decode('ascii') + +class UnsupportedType(TypeError): + pass + + +p('stringbench v%s' % VERSION) +p(sys.version) +p(datetime.datetime.now()) + +REPEAT = 1 +REPEAT = 3 +#REPEAT = 7 + +if __name__ != "__main__": + raise SystemExit("Must run as main program") + +parser = optparse.OptionParser() +parser.add_option("-R", "--skip-re", dest="skip_re", + action="store_true", + help="skip regular expression tests") +parser.add_option("-8", "--8-bit", dest="bytes_only", + action="store_true", + help="only do 8-bit string benchmarks") +parser.add_option("-u", "--unicode", dest="unicode_only", + action="store_true", + help="only do Unicode string benchmarks") + + +_RANGE_1000 = list(range(1000)) +_RANGE_100 = list(range(100)) +_RANGE_10 = list(range(10)) + +dups = {} +def bench(s, group, repeat_count): + def blah(f): + if f.__name__ in dups: + raise AssertionError("Multiple functions with same name: %r" % + (f.__name__,)) + dups[f.__name__] = 1 + f.comment = s + f.is_bench = True + f.group = group + f.repeat_count = repeat_count + return f + return blah + +def uses_re(f): + f.uses_re = True + +####### 'in' comparisons + +@bench('"A" in "A"*1000', "early match, single character", 1000) +def in_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + for x in _RANGE_1000: + s2 in s1 + +@bench('"B" in "A"*1000', "no match, single character", 1000) +def in_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + for x in _RANGE_1000: + s2 in s1 + + +@bench('"AB" in "AB"*1000', "early match, two characters", 1000) +def in_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + for x in _RANGE_1000: + s2 in s1 + +@bench('"BC" in "AB"*1000', "no match, two characters", 1000) +def in_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + for x in _RANGE_1000: + s2 in s1 + +@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000) +def in_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + for x in _RANGE_1000: + s2 in s1 + +@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")', + "late match, 100 characters", 100) +def in_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*300 + m+e + s2 = m+e + for x in _RANGE_100: + s2 in s1 + +# Try with regex +@uses_re +@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")', + "late match, 100 characters", 100) +def re_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*300 + m+e + s2 = m+e + pat = re.compile(s2) + search = pat.search + for x in _RANGE_100: + search(s1) + + +#### same tests as 'in' but use 'find' + +@bench('("A"*1000).find("A")', "early match, single character", 1000) +def find_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("A"*1000).find("B")', "no match, single character", 1000) +def find_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + + +@bench('("AB"*1000).find("AB")', "early match, two characters", 1000) +def find_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*1000).find("BC")', "no match, two characters", 1000) +def find_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*1000).find("CA")', "no match, two characters", 1000) +def find_test_no_match_two_character_bis(STR): + s1 = STR("AB" * 1000) + s2 = STR("CA") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000) +def find_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000) +def find_test_slow_match_two_characters_bis(STR): + s1 = STR("AB" * 300+"CA") + s2 = STR("CA") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")', + "late match, 100 characters", 100) +def find_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_find = s1.find + for x in _RANGE_100: + s1_find(s2) + +@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)', + "late match, 100 characters", 100) +def find_test_slow_match_100_characters_bis(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + e+m + s2 = e+m + s1_find = s1.find + for x in _RANGE_100: + s1_find(s2) + + +#### Same tests for 'rfind' + +@bench('("A"*1000).rfind("A")', "early match, single character", 1000) +def rfind_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("A"*1000).rfind("B")', "no match, single character", 1000) +def rfind_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + + +@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000) +def rfind_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000) +def rfind_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000) +def rfind_test_no_match_two_character_bis(STR): + s1 = STR("AB" * 1000) + s2 = STR("CA") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters_bis(STR): + s1 = STR("BC" + "AB" * 300) + s2 = STR("BC") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e+m + (d+m)*500 + s2 = e+m + s1_rfind = s1.rfind + for x in _RANGE_100: + s1_rfind(s2) + +@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters_bis(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = m+e + (d+m)*500 + s2 = m+e + s1_rfind = s1.rfind + for x in _RANGE_100: + s1_rfind(s2) + + +#### Now with index. +# Skip the ones which fail because that would include exception overhead. + +@bench('("A"*1000).index("A")', "early match, single character", 1000) +def index_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_index = s1.index + for x in _RANGE_1000: + s1_index(s2) + +@bench('("AB"*1000).index("AB")', "early match, two characters", 1000) +def index_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_index = s1.index + for x in _RANGE_1000: + s1_index(s2) + +@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000) +def index_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_index = s1.index + for x in _RANGE_1000: + s1_index(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")', + "late match, 100 characters", 100) +def index_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_index = s1.index + for x in _RANGE_100: + s1_index(s2) + + +#### Same for rindex + +@bench('("A"*1000).rindex("A")', "early match, single character", 1000) +def rindex_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000) +def rindex_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000) +def rindex_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)', + "late match, 100 characters", 100) +def rindex_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e + m + (d+m)*500 + s2 = e + m + s1_rindex = s1.rindex + for x in _RANGE_100: + s1_rindex(s2) + + +#### Same for partition + +@bench('("A"*1000).partition("A")', "early match, single character", 1000) +def partition_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("A"*1000).partition("B")', "no match, single character", 1000) +def partition_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + + +@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000) +def partition_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000) +def partition_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000) +def partition_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")', + "late match, 100 characters", 100) +def partition_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_partition = s1.partition + for x in _RANGE_100: + s1_partition(s2) + + +#### Same for rpartition + +@bench('("A"*1000).rpartition("A")', "early match, single character", 1000) +def rpartition_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("A"*1000).rpartition("B")', "no match, single character", 1000) +def rpartition_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + + +@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000) +def rpartition_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000) +def rpartition_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000) +def rpartition_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)', + "late match, 100 characters", 100) +def rpartition_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e + m + (d+m)*500 + s2 = e + m + s1_rpartition = s1.rpartition + for x in _RANGE_100: + s1_rpartition(s2) + + +#### Same for split(s, 1) + +@bench('("A"*1000).split("A", 1)', "early match, single character", 1000) +def split_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("A"*1000).split("B", 1)', "no match, single character", 1000) +def split_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + + +@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000) +def split_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000) +def split_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000) +def split_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)', + "late match, 100 characters", 100) +def split_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = (m+d)*500 + m+e + s2 = m+e + s1_split = s1.split + for x in _RANGE_100: + s1_split(s2, 1) + + +#### Same for rsplit(s, 1) + +@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000) +def rsplit_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000) +def rsplit_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + + +@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000) +def rsplit_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000) +def rsplit_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000) +def rsplit_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)', + "late match, 100 characters", 100) +def rsplit_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + d = STR("D") + e = STR("E") + s1 = e + m + (d+m)*500 + s2 = e + m + s1_rsplit = s1.rsplit + for x in _RANGE_100: + s1_rsplit(s2, 1) + + +#### Benchmark the operator-based methods + +@bench('"A"*10', "repeat 1 character 10 times", 1000) +def repeat_single_10_times(STR): + s = STR("A") + for x in _RANGE_1000: + s * 10 + +@bench('"A"*1000', "repeat 1 character 1000 times", 1000) +def repeat_single_1000_times(STR): + s = STR("A") + for x in _RANGE_1000: + s * 1000 + +@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000) +def repeat_5_10_times(STR): + s = STR("ABCDE") + for x in _RANGE_1000: + s * 10 + +@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000) +def repeat_5_1000_times(STR): + s = STR("ABCDE") + for x in _RANGE_1000: + s * 1000 + +# + for concat + +@bench('"Andrew"+"Dalke"', "concat two strings", 1000) +def concat_two_strings(STR): + s1 = STR("Andrew") + s2 = STR("Dalke") + for x in _RANGE_1000: + s1+s2 + +@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15", + 1000) +def concat_many_strings(STR): + s1=STR('TIXSGYNREDCVBHJ') + s2=STR('PUMTLXBZVDO') + s3=STR('FVZNJ') + s4=STR('OGDXUW') + s5=STR('WEIMRNCOYVGHKB') + s6=STR('FCQTNMXPUZH') + s7=STR('TICZJYRLBNVUEAK') + s8=STR('REYB') + s9=STR('PWUOQ') + s10=STR('EQHCMKBS') + s11=STR('AEVDFOH') + s12=STR('IFHVD') + s13=STR('JGTCNLXWOHQ') + s14=STR('ITSKEPYLROZAWXF') + s15=STR('THEK') + s16=STR('GHPZFBUYCKMNJIT') + s17=STR('JMUZ') + s18=STR('WLZQMTB') + s19=STR('KPADCBW') + s20=STR('TNJHZQAGBU') + for x in _RANGE_1000: + (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+ + s11+s12+s13+s14+s15+s16+s17+s18+s19+s20) + + +#### Benchmark join + +def get_bytes_yielding_seq(STR, arg): + if STR is BYTES and sys.version_info >= (3,): + raise UnsupportedType + return STR(arg) + +@bench('"A".join("")', + "join empty string, with 1 character sep", 100) +def join_empty_single(STR): + sep = STR("A") + s2 = get_bytes_yielding_seq(STR, "") + sep_join = sep.join + for x in _RANGE_100: + sep_join(s2) + +@bench('"ABCDE".join("")', + "join empty string, with 5 character sep", 100) +def join_empty_5(STR): + sep = STR("ABCDE") + s2 = get_bytes_yielding_seq(STR, "") + sep_join = sep.join + for x in _RANGE_100: + sep_join(s2) + +@bench('"A".join("ABC..Z")', + "join string with 26 characters, with 1 character sep", 1000) +def join_alphabet_single(STR): + sep = STR("A") + s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ") + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"ABCDE".join("ABC..Z")', + "join string with 26 characters, with 5 character sep", 1000) +def join_alphabet_5(STR): + sep = STR("ABCDE") + s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ") + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"A".join(list("ABC..Z"))', + "join list of 26 characters, with 1 character sep", 1000) +def join_alphabet_list_single(STR): + sep = STR("A") + s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"] + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"ABCDE".join(list("ABC..Z"))', + "join list of 26 characters, with 5 character sep", 1000) +def join_alphabet_list_five(STR): + sep = STR("ABCDE") + s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"] + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"A".join(["Bob"]*100))', + "join list of 100 words, with 1 character sep", 1000) +def join_100_words_single(STR): + sep = STR("A") + s2 = [STR("Bob")]*100 + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +@bench('"ABCDE".join(["Bob"]*100))', + "join list of 100 words, with 5 character sep", 1000) +def join_100_words_5(STR): + sep = STR("ABCDE") + s2 = [STR("Bob")]*100 + sep_join = sep.join + for x in _RANGE_1000: + sep_join(s2) + +#### split tests + +@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000) +def whitespace_split(STR): + s = STR("Here are some words. "*2) + s_split = s.split + for x in _RANGE_1000: + s_split() + +@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000) +def whitespace_rsplit(STR): + s = STR("Here are some words. "*2) + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit() + +@bench('("Here are some words. "*2).split(None, 1)', + "split 1 whitespace", 1000) +def whitespace_split_1(STR): + s = STR("Here are some words. "*2) + s_split = s.split + N = None + for x in _RANGE_1000: + s_split(N, 1) + +@bench('("Here are some words. "*2).rsplit(None, 1)', + "split 1 whitespace", 1000) +def whitespace_rsplit_1(STR): + s = STR("Here are some words. "*2) + s_rsplit = s.rsplit + N = None + for x in _RANGE_1000: + s_rsplit(N, 1) + +@bench('("Here are some words. "*2).partition(" ")', + "split 1 whitespace", 1000) +def whitespace_partition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_partition = s.partition + for x in _RANGE_1000: + s_partition(sep) + +@bench('("Here are some words. "*2).rpartition(" ")', + "split 1 whitespace", 1000) +def whitespace_rpartition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_rpartition = s.rpartition + for x in _RANGE_1000: + s_rpartition(sep) + +human_text = """\ +Python is a dynamic object-oriented programming language that can be +used for many kinds of software development. It offers strong support +for integration with other languages and tools, comes with extensive +standard libraries, and can be learned in a few days. Many Python +programmers report substantial productivity gains and feel the language +encourages the development of higher quality, more maintainable code. + +Python runs on Windows, Linux/Unix, Mac OS X, OS/2, Amiga, Palm +Handhelds, and Nokia mobile phones. Python has also been ported to the +Java and .NET virtual machines. + +Python is distributed under an OSI-approved open source license that +makes it free to use, even for commercial products. +"""*25 +human_text_bytes = bytes_from_str(human_text) +human_text_unicode = unicode_from_str(human_text) +def _get_human_text(STR): + if STR is UNICODE: + return human_text_unicode + if STR is BYTES: + return human_text_bytes + raise AssertionError + +@bench('human_text.split()', "split whitespace (huge)", 10) +def whitespace_split_huge(STR): + s = _get_human_text(STR) + s_split = s.split + for x in _RANGE_10: + s_split() + +@bench('human_text.rsplit()', "split whitespace (huge)", 10) +def whitespace_rsplit_huge(STR): + s = _get_human_text(STR) + s_rsplit = s.rsplit + for x in _RANGE_10: + s_rsplit() + + + +@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000) +def newlines_split(STR): + s = STR("this\nis\na\ntest\n") + s_split = s.split + nl = STR("\n") + for x in _RANGE_1000: + s_split(nl) + + +@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000) +def newlines_rsplit(STR): + s = STR("this\nis\na\ntest\n") + s_rsplit = s.rsplit + nl = STR("\n") + for x in _RANGE_1000: + s_rsplit(nl) + +@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000) +def newlines_splitlines(STR): + s = STR("this\nis\na\ntest\n") + s_splitlines = s.splitlines + for x in _RANGE_1000: + s_splitlines() + +## split text with 2000 newlines + +def _make_2000_lines(): + import random + r = random.Random(100) + chars = list(map(chr, range(32, 128))) + i = 0 + while i < len(chars): + chars[i] = " " + i += r.randrange(9) + s = "".join(chars) + s = s*4 + words = [] + for i in range(2000): + start = r.randrange(96) + n = r.randint(5, 65) + words.append(s[start:start+n]) + return "\n".join(words)+"\n" + +_text_with_2000_lines = _make_2000_lines() +_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines) +_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines) +def _get_2000_lines(STR): + if STR is UNICODE: + return _text_with_2000_lines_unicode + if STR is BYTES: + return _text_with_2000_lines_bytes + raise AssertionError + + +@bench('"...text...".split("\\n")', "split 2000 newlines", 10) +def newlines_split_2000(STR): + s = _get_2000_lines(STR) + s_split = s.split + nl = STR("\n") + for x in _RANGE_10: + s_split(nl) + +@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10) +def newlines_rsplit_2000(STR): + s = _get_2000_lines(STR) + s_rsplit = s.rsplit + nl = STR("\n") + for x in _RANGE_10: + s_rsplit(nl) + +@bench('"...text...".splitlines()', "split 2000 newlines", 10) +def newlines_splitlines_2000(STR): + s = _get_2000_lines(STR) + s_splitlines = s.splitlines + for x in _RANGE_10: + s_splitlines() + + +## split text on "--" characters +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".split("--")', + "split on multicharacter separator (small)", 1000) +def split_multichar_sep_small(STR): + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_split = s.split + pat = STR("--") + for x in _RANGE_1000: + s_split(pat) +@bench( + '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")', + "split on multicharacter separator (small)", 1000) +def rsplit_multichar_sep_small(STR): + s = STR("this--is--a--test--of--the--emergency--broadcast--system") + s_rsplit = s.rsplit + pat = STR("--") + for x in _RANGE_1000: + s_rsplit(pat) + +## split dna text on "ACTAT" characters +@bench('dna.split("ACTAT")', + "split on multicharacter separator (dna)", 10) +def split_multichar_sep_dna(STR): + s = _get_dna(STR) + s_split = s.split + pat = STR("ACTAT") + for x in _RANGE_10: + s_split(pat) + +@bench('dna.rsplit("ACTAT")', + "split on multicharacter separator (dna)", 10) +def rsplit_multichar_sep_dna(STR): + s = _get_dna(STR) + s_rsplit = s.rsplit + pat = STR("ACTAT") + for x in _RANGE_10: + s_rsplit(pat) + + + +## split with limits + +GFF3_example = "\t".join([ + "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".", + "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"]) + +@bench('GFF3_example.split("\\t")', "tab split", 1000) +def tab_split_no_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_split = s.split + for x in _RANGE_1000: + s_split(sep) + +@bench('GFF3_example.split("\\t", 8)', "tab split", 1000) +def tab_split_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_split = s.split + for x in _RANGE_1000: + s_split(sep, 8) + +@bench('GFF3_example.rsplit("\\t")', "tab split", 1000) +def tab_rsplit_no_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit(sep) + +@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000) +def tab_rsplit_limit(STR): + sep = STR("\t") + s = STR(GFF3_example) + s_rsplit = s.rsplit + for x in _RANGE_1000: + s_rsplit(sep, 8) + +#### Count characters + +@bench('...text.with.2000.newlines.count("\\n")', + "count newlines", 10) +def count_newlines(STR): + s = _get_2000_lines(STR) + s_count = s.count + nl = STR("\n") + for x in _RANGE_10: + s_count(nl) + +# Orchid sequences concatenated, from Biopython +_dna = """ +CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT +AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG +AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT +TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC +AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG +TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT +CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT +TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT +GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC +TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG +GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA +ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC +CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA +ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA +ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA +TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG +CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG +GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA +ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG +ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC +ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA +GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA +TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG +TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT +TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG +GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG +GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT +AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC +GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG +TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT +CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA +TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC +TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC +AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT +GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT +GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA +CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG +GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA +TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG +ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT +GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA +AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC +AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA +ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC +GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC +GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC +AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA +GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG +ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC +GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC +""" +_dna = "".join(_dna.splitlines()) +_dna = _dna * 25 +_dna_bytes = bytes_from_str(_dna) +_dna_unicode = unicode_from_str(_dna) + +def _get_dna(STR): + if STR is UNICODE: + return _dna_unicode + if STR is BYTES: + return _dna_bytes + raise AssertionError + +@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10) +def count_aact(STR): + seq = _get_dna(STR) + seq_count = seq.count + needle = STR("AACT") + for x in _RANGE_10: + seq_count(needle) + +##### startswith and endswith + +@bench('"Andrew".startswith("A")', 'startswith single character', 1000) +def startswith_single(STR): + s1 = STR("Andrew") + s2 = STR("A") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + +@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters', + 1000) +def startswith_multiple(STR): + s1 = STR("Andrew") + s2 = STR("Andrew") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + +@bench('"Andrew".startswith("Anders")', + 'startswith multiple characters - not!', 1000) +def startswith_multiple_not(STR): + s1 = STR("Andrew") + s2 = STR("Anders") + s1_startswith = s1.startswith + for x in _RANGE_1000: + s1_startswith(s2) + + +# endswith + +@bench('"Andrew".endswith("w")', 'endswith single character', 1000) +def endswith_single(STR): + s1 = STR("Andrew") + s2 = STR("w") + s1_endswith = s1.endswith + for x in _RANGE_1000: + s1_endswith(s2) + +@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000) +def endswith_multiple(STR): + s1 = STR("Andrew") + s2 = STR("Andrew") + s1_endswith = s1.endswith + for x in _RANGE_1000: + s1_endswith(s2) + +@bench('"Andrew".endswith("Anders")', + 'endswith multiple characters - not!', 1000) +def endswith_multiple_not(STR): + s1 = STR("Andrew") + s2 = STR("Anders") + s1_endswith = s1.endswith + for x in _RANGE_1000: + s1_endswith(s2) + +#### Strip + +@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000) +def terminal_newline_strip_right(STR): + s = STR("Hello!\n") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000) +def terminal_newline_rstrip(STR): + s = STR("Hello!\n") + s_rstrip = s.rstrip + for x in _RANGE_1000: + s_rstrip() + +@bench('"\\nHello!".strip()', 'strip terminal newline', 1000) +def terminal_newline_strip_left(STR): + s = STR("\nHello!") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000) +def terminal_newline_strip_both(STR): + s = STR("\nHello!\n") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000) +def terminal_newline_lstrip(STR): + s = STR("\nHello!") + s_lstrip = s.lstrip + for x in _RANGE_1000: + s_lstrip() + +@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s', + 'strip terminal newline', 1000) +def terminal_newline_if_else(STR): + s = STR("Hello!\n") + NL = STR("\n") + for x in _RANGE_1000: + s[:-1] if (s[-1] == NL) else s + + +# Strip multiple spaces or tabs + +@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000) +def terminal_space_strip(STR): + s = STR("Hello\t \t!") + s_strip = s.strip + for x in _RANGE_1000: + s_strip() + +@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000) +def terminal_space_rstrip(STR): + s = STR("Hello!\t \t") + s_rstrip = s.rstrip + for x in _RANGE_1000: + s_rstrip() + +@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000) +def terminal_space_lstrip(STR): + s = STR("\t \tHello!") + s_lstrip = s.lstrip + for x in _RANGE_1000: + s_lstrip() + + +#### replace +@bench('"This is a test".replace(" ", "\\t")', 'replace single character', + 1000) +def replace_single_character(STR): + s = STR("This is a test!") + from_str = STR(" ") + to_str = STR("\t") + s_replace = s.replace + for x in _RANGE_1000: + s_replace(from_str, to_str) + +@uses_re +@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character', + 1000) +def replace_single_character_re(STR): + s = STR("This is a test!") + pat = re.compile(STR(" ")) + to_str = STR("\t") + pat_sub = pat.sub + for x in _RANGE_1000: + pat_sub(to_str, s) + +@bench('"...text.with.2000.lines...replace("\\n", " ")', + 'replace single character, big string', 10) +def replace_single_character_big(STR): + s = _get_2000_lines(STR) + from_str = STR("\n") + to_str = STR(" ") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str) + +@uses_re +@bench('re.sub("\\n", " ", "...text.with.2000.lines...")', + 'replace single character, big string', 10) +def replace_single_character_big_re(STR): + s = _get_2000_lines(STR) + pat = re.compile(STR("\n")) + to_str = STR(" ") + pat_sub = pat.sub + for x in _RANGE_10: + pat_sub(to_str, s) + + +@bench('dna.replace("ATC", "ATT")', + 'replace multiple characters, dna', 10) +def replace_multiple_characters_dna(STR): + seq = _get_dna(STR) + from_str = STR("ATC") + to_str = STR("ATT") + seq_replace = seq.replace + for x in _RANGE_10: + seq_replace(from_str, to_str) + +# This increases the character count +@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")', + 'replace and expand multiple characters, big string', 10) +def replace_multiple_character_big(STR): + s = _get_2000_lines(STR) + from_str = STR("\n") + to_str = STR("\r\n") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str) + + +# This decreases the character count +@bench('"When shall we three meet again?".replace("ee", "")', + 'replace/remove multiple characters', 1000) +def replace_multiple_character_remove(STR): + s = STR("When shall we three meet again?") + from_str = STR("ee") + to_str = STR("") + s_replace = s.replace + for x in _RANGE_1000: + s_replace(from_str, to_str) + + +big_s = "A" + ("Z"*128*1024) +big_s_bytes = bytes_from_str(big_s) +big_s_unicode = unicode_from_str(big_s) +def _get_big_s(STR): + if STR is UNICODE: return big_s_unicode + if STR is BYTES: return big_s_bytes + raise AssertionError + +# The older replace implementation counted all matches in +# the string even when it only needed to make one replacement. +@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)', + 'quick replace single character match', 10) +def quick_replace_single_match(STR): + s = _get_big_s(STR) + from_str = STR("A") + to_str = STR("BB") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str, 1) + +@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)', + 'quick replace multiple character match', 10) +def quick_replace_multiple_match(STR): + s = _get_big_s(STR) + from_str = STR("AZZ") + to_str = STR("BBZZ") + s_replace = s.replace + for x in _RANGE_10: + s_replace(from_str, to_str, 1) + + +#### + +# CCP does a lot of this, for internationalisation of ingame messages. +_format = "The %(thing)s is %(place)s the %(location)s." +_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", } +_format_bytes = bytes_from_str(_format) +_format_unicode = unicode_from_str(_format) +_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items()) +_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items()) + +def _get_format(STR): + if STR is UNICODE: + return _format_unicode + if STR is BYTES: + if sys.version_info >= (3,): + raise UnsupportedType + return _format_bytes + raise AssertionError + +def _get_format_dict(STR): + if STR is UNICODE: + return _format_dict_unicode + if STR is BYTES: + if sys.version_info >= (3,): + raise UnsupportedType + return _format_dict_bytes + raise AssertionError + +# Formatting. +@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}', + 'formatting a string type with a dict', 1000) +def format_with_dict(STR): + s = _get_format(STR) + d = _get_format_dict(STR) + for x in _RANGE_1000: + s % d + + +#### Upper- and lower- case conversion + +@bench('("Where in the world is Carmen San Deigo?"*10).lower()', + "case conversion -- rare", 1000) +def lower_conversion_rare(STR): + s = STR("Where in the world is Carmen San Deigo?"*10) + s_lower = s.lower + for x in _RANGE_1000: + s_lower() + +@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()', + "case conversion -- dense", 1000) +def lower_conversion_dense(STR): + s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10) + s_lower = s.lower + for x in _RANGE_1000: + s_lower() + + +@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()', + "case conversion -- rare", 1000) +def upper_conversion_rare(STR): + s = STR("Where in the world is Carmen San Deigo?"*10) + s_upper = s.upper + for x in _RANGE_1000: + s_upper() + +@bench('("where in the world is carmen san deigo?"*10).upper()', + "case conversion -- dense", 1000) +def upper_conversion_dense(STR): + s = STR("where in the world is carmen san deigo?"*10) + s_upper = s.upper + for x in _RANGE_1000: + s_upper() + + +# end of benchmarks + +################# + +class BenchTimer(timeit.Timer): + def best(self, repeat=1): + for i in range(1, 10): + number = 10**i + x = self.timeit(number) + if x > 0.02: + break + times = [x] + for i in range(1, repeat): + times.append(self.timeit(number)) + return min(times) / number + +def main(): + (options, test_names) = parser.parse_args() + if options.bytes_only and options.unicode_only: + raise SystemExit("Only one of --8-bit and --unicode are allowed") + + bench_functions = [] + for (k,v) in globals().items(): + if hasattr(v, "is_bench"): + if test_names: + for name in test_names: + if name in v.group: + break + else: + # Not selected, ignore + continue + if options.skip_re and hasattr(v, "uses_re"): + continue + + bench_functions.append( (v.group, k, v) ) + bench_functions.sort() + + p("bytes\tunicode") + p("(in ms)\t(in ms)\t%\tcomment") + + bytes_total = uni_total = 0.0 + + for title, group in itertools.groupby(bench_functions, + operator.itemgetter(0)): + # Flush buffer before each group + sys.stdout.flush() + p("="*10, title) + for (_, k, v) in group: + if hasattr(v, "is_bench"): + bytes_time = 0.0 + bytes_time_s = " - " + if not options.unicode_only: + try: + bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,), + "import __main__").best(REPEAT) + bytes_time_s = "%.2f" % (1000 * bytes_time) + bytes_total += bytes_time + except UnsupportedType: + bytes_time_s = "N/A" + uni_time = 0.0 + uni_time_s = " - " + if not options.bytes_only: + try: + uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,), + "import __main__").best(REPEAT) + uni_time_s = "%.2f" % (1000 * uni_time) + uni_total += uni_time + except UnsupportedType: + uni_time_s = "N/A" + try: + average = bytes_time/uni_time + except (TypeError, ZeroDivisionError): + average = 0.0 + p("%s\t%s\t%.1f\t%s (*%d)" % ( + bytes_time_s, uni_time_s, 100.*average, + v.comment, v.repeat_count)) + + if bytes_total == uni_total == 0.0: + p("That was zippy!") + else: + try: + ratio = bytes_total/uni_total + except ZeroDivisionError: + ratio = 0.0 + p("%.2f\t%.2f\t%.1f\t%s" % ( + 1000*bytes_total, 1000*uni_total, 100.*ratio, + "TOTAL")) + +if __name__ == "__main__": + main() diff --git a/Tools/test2to3/maintest.py b/Tools/test2to3/maintest.py index 036dd4f..036dd4f 100644..100755 --- a/Tools/test2to3/maintest.py +++ b/Tools/test2to3/maintest.py diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py index 01303db..6525ddf 100644..100755 --- a/Tools/unicode/comparecodecs.py +++ b/Tools/unicode/comparecodecs.py @@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2): print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2)) mismatch = 0 # Check encoding - for i in range(sys.maxunicode): + for i in range(sys.maxunicode+1): u = chr(i) try: c1 = u.encode(encoding1) diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py index 7e7d6d0..f5a1af3 100644 --- a/Tools/unicode/gencodec.py +++ b/Tools/unicode/gencodec.py @@ -102,7 +102,7 @@ def readmap(filename): comment = '' else: comment = comment[1:].strip() - if enc < 256: + if not isinstance(enc, tuple) and enc < 256: if enc in unmapped: unmapped.remove(enc) if enc == uni: @@ -202,11 +202,10 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): # Analyze map and create table dict mappings = sorted(map.items()) table = {} - maxkey = 0 + maxkey = 255 if 'IDENTITY' in map: for key in range(256): table[key] = (key, '') - maxkey = 255 del map['IDENTITY'] for mapkey, mapvalue in mappings: mapcomment = '' @@ -224,6 +223,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): return None # Create table code + maxchar = 0 for key in range(maxkey + 1): if key not in table: mapvalue = MISSING_CODE @@ -238,6 +238,7 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): return None else: mapchar = chr(mapvalue) + maxchar = max(maxchar, ord(mapchar)) if mapcomment and comments: append(' %a \t# %s -> %s' % (mapchar, hexrepr(key, key_precision), @@ -245,6 +246,8 @@ def python_tabledef_code(varname, map, comments=1, key_precision=2): else: append(' %a' % mapchar) + if maxchar < 256: + append(' %a \t## Widen to UCS2 for optimization' % UNI_UNDEFINED) append(')') return l diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index d503190..bc3d0cb 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -21,17 +21,23 @@ # 2004-05-29 perky add east asian width information # 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta # 2008-06-11 gb add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch +# 2011-10-21 ezio add support for name aliases and named sequences +# 2012-01 benjamin add full case mappings # # written by Fredrik Lundh (fredrik@pythonware.com) # -import sys, os, zipfile +import os +import sys +import zipfile + +from textwrap import dedent SCRIPT = sys.argv[0] VERSION = "3.2" # The Unicode Database -UNIDATA_VERSION = "6.0.0" +UNIDATA_VERSION = "6.1.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" EASTASIAN_WIDTH = "EastAsianWidth%s.txt" @@ -39,6 +45,19 @@ UNIHAN = "Unihan%s.zip" DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt" DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt" LINE_BREAK = "LineBreak%s.txt" +NAME_ALIASES = "NameAliases%s.txt" +NAMED_SEQUENCES = "NamedSequences%s.txt" +SPECIAL_CASING = "SpecialCasing%s.txt" +CASE_FOLDING = "CaseFolding%s.txt" + +# Private Use Areas -- in planes 1, 15, 16 +PUA_1 = range(0xE000, 0xF900) +PUA_15 = range(0xF0000, 0xFFFFE) +PUA_16 = range(0x100000, 0x10FFFE) + +# we use this ranges of PUA_15 to store name aliases and named sequences +NAME_ALIASES_START = 0xF0000 +NAMED_SEQUENCES_START = 0xF0200 old_versions = ["3.2.0"] @@ -67,13 +86,15 @@ UPPER_MASK = 0x80 XID_START_MASK = 0x100 XID_CONTINUE_MASK = 0x200 PRINTABLE_MASK = 0x400 -NODELTA_MASK = 0x800 -NUMERIC_MASK = 0x1000 +NUMERIC_MASK = 0x800 +CASE_IGNORABLE_MASK = 0x1000 +CASED_MASK = 0x2000 +EXTENDED_CASE_MASK = 0x4000 # these ranges need to match unicodedata.c:is_unified_ideograph cjk_ranges = [ ('3400', '4DB5'), - ('4E00', '9FCB'), + ('4E00', '9FCC'), ('20000', '2A6D6'), ('2A700', '2B734'), ('2B740', '2B81D') @@ -367,6 +388,7 @@ def makeunicodetype(unicode, trace): numeric = {} spaces = [] linebreaks = [] + extra_casing = [] for char in unicode.chars: record = unicode.table[char] @@ -379,7 +401,7 @@ def makeunicodetype(unicode, trace): delta = True if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]: flags |= ALPHA_MASK - if category == "Ll": + if "Lowercase" in properties: flags |= LOWER_MASK if 'Line_Break' in properties or bidirectional == "B": flags |= LINEBREAK_MASK @@ -389,7 +411,7 @@ def makeunicodetype(unicode, trace): spaces.append(char) if category == "Lt": flags |= TITLE_MASK - if category == "Lu": + if "Uppercase" in properties: flags |= UPPER_MASK if char == ord(" ") or category[0] not in ("C", "Z"): flags |= PRINTABLE_MASK @@ -397,7 +419,12 @@ def makeunicodetype(unicode, trace): flags |= XID_START_MASK if "XID_Continue" in properties: flags |= XID_CONTINUE_MASK - # use delta predictor for upper/lower/title if it fits + if "Cased" in properties: + flags |= CASED_MASK + if "Case_Ignorable" in properties: + flags |= CASE_IGNORABLE_MASK + sc = unicode.special_casing.get(char) + cf = unicode.case_folding.get(char, [char]) if record[12]: upper = int(record[12], 16) else: @@ -409,23 +436,39 @@ def makeunicodetype(unicode, trace): if record[14]: title = int(record[14], 16) else: - # UCD.html says that a missing title char means that - # it defaults to the uppercase character, not to the - # character itself. Apparently, in the current UCD (5.x) - # this feature is never used title = upper - upper_d = upper - char - lower_d = lower - char - title_d = title - char - if -32768 <= upper_d <= 32767 and \ - -32768 <= lower_d <= 32767 and \ - -32768 <= title_d <= 32767: - # use deltas - upper = upper_d & 0xffff - lower = lower_d & 0xffff - title = title_d & 0xffff + if sc is None and cf != [lower]: + sc = ([lower], [title], [upper]) + if sc is None: + if upper == lower == title: + upper = lower = title = 0 + else: + upper = upper - char + lower = lower - char + title = title - char + assert (abs(upper) <= 2147483647 and + abs(lower) <= 2147483647 and + abs(title) <= 2147483647) else: - flags |= NODELTA_MASK + # This happens either when some character maps to more than one + # character in uppercase, lowercase, or titlecase or the + # casefolded version of the character is different from the + # lowercase. The extra characters are stored in a different + # array. + flags |= EXTENDED_CASE_MASK + lower = len(extra_casing) | (len(sc[0]) << 24) + extra_casing.extend(sc[0]) + if cf != sc[0]: + lower |= len(cf) << 20 + extra_casing.extend(cf) + upper = len(extra_casing) | (len(sc[2]) << 24) + extra_casing.extend(sc[2]) + # Title is probably equal to upper. + if sc[1] == sc[2]: + title = upper + else: + title = len(extra_casing) | (len(sc[1]) << 24) + extra_casing.extend(sc[1]) # decimal digit, integer digit decimal = 0 if record[6]: @@ -452,6 +495,7 @@ def makeunicodetype(unicode, trace): print(sum(map(len, numeric.values())), "numeric code points") print(len(spaces), "whitespace code points") print(len(linebreaks), "linebreak code points") + print(len(extra_casing), "extended case array") print("--- Writing", FILE, "...") @@ -465,6 +509,14 @@ def makeunicodetype(unicode, trace): print("};", file=fp) print(file=fp) + print("/* extended case mappings */", file=fp) + print(file=fp) + print("const Py_UCS4 _PyUnicode_ExtendedCase[] = {", file=fp) + for c in extra_casing: + print(" %d," % c, file=fp) + print("};", file=fp) + print(file=fp) + # split decomposition index table index1, index2, shift = splitbins(index, trace) @@ -692,6 +744,39 @@ def makeunicodename(unicode, trace): print("/* name->code dictionary */", file=fp) codehash.dump(fp, trace) + print(file=fp) + print('static const unsigned int aliases_start = %#x;' % + NAME_ALIASES_START, file=fp) + print('static const unsigned int aliases_end = %#x;' % + (NAME_ALIASES_START + len(unicode.aliases)), file=fp) + + print('static const unsigned int name_aliases[] = {', file=fp) + for name, codepoint in unicode.aliases: + print(' 0x%04X,' % codepoint, file=fp) + print('};', file=fp) + + # In Unicode 6.0.0, the sequences contain at most 4 BMP chars, + # so we are using Py_UCS2 seq[4]. This needs to be updated if longer + # sequences or sequences with non-BMP chars are added. + # unicodedata_lookup should be adapted too. + print(dedent(""" + typedef struct NamedSequence { + int seqlen; + Py_UCS2 seq[4]; + } named_sequence; + """), file=fp) + + print('static const unsigned int named_sequences_start = %#x;' % + NAMED_SEQUENCES_START, file=fp) + print('static const unsigned int named_sequences_end = %#x;' % + (NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=fp) + + print('static const named_sequence named_sequences[] = {', file=fp) + for name, sequence in unicode.named_sequences: + seq_str = ', '.join('0x%04X' % cp for cp in sequence) + print(' {%d, {%s}},' % (len(sequence), seq_str), file=fp) + print('};', file=fp) + fp.close() @@ -726,7 +811,11 @@ def merge_old_version(version, new, old): for k in range(len(old.table[i])): if old.table[i][k] != new.table[i][k]: value = old.table[i][k] - if k == 2: + if k == 1 and i in PUA_15: + # the name is not set in the old.table, but in the + # new.table we are using it for aliases and named seq + assert value == '' + elif k == 2: #print "CATEGORY",hex(i), old.table[i][k], new.table[i][k] category_changes[i] = CATEGORY_NAMES.index(value) elif k == 4: @@ -816,15 +905,15 @@ class UnicodeData: expand=1, cjk_check=True): self.changed = [] - file = open_data(UNICODE_DATA, version) table = [None] * 0x110000 - while 1: - s = file.readline() - if not s: - break - s = s.strip().split(";") - char = int(s[0], 16) - table[char] = s + with open_data(UNICODE_DATA, version) as file: + while 1: + s = file.readline() + if not s: + break + s = s.strip().split(";") + char = int(s[0], 16) + table[char] = s cjk_ranges_found = [] @@ -855,32 +944,79 @@ class UnicodeData: self.table = table self.chars = list(range(0x110000)) # unicode 3.2 - file = open_data(COMPOSITION_EXCLUSIONS, version) + # check for name aliases and named sequences, see #12753 + # aliases and named sequences are not in 3.2.0 + if version != '3.2.0': + self.aliases = [] + # store aliases in the Private Use Area 15, in range U+F0000..U+F00FF, + # in order to take advantage of the compression and lookup + # algorithms used for the other characters + pua_index = NAME_ALIASES_START + with open_data(NAME_ALIASES, version) as file: + for s in file: + s = s.strip() + if not s or s.startswith('#'): + continue + char, name, abbrev = s.split(';') + char = int(char, 16) + self.aliases.append((name, char)) + # also store the name in the PUA 1 + self.table[pua_index][1] = name + pua_index += 1 + assert pua_index - NAME_ALIASES_START == len(self.aliases) + + self.named_sequences = [] + # store named sequences in the PUA 1, in range U+F0100.., + # in order to take advantage of the compression and lookup + # algorithms used for the other characters. + + assert pua_index < NAMED_SEQUENCES_START + pua_index = NAMED_SEQUENCES_START + with open_data(NAMED_SEQUENCES, version) as file: + for s in file: + s = s.strip() + if not s or s.startswith('#'): + continue + name, chars = s.split(';') + chars = tuple(int(char, 16) for char in chars.split()) + # check that the structure defined in makeunicodename is OK + assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size" + assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in " + "the NamedSequence struct and in unicodedata_lookup") + self.named_sequences.append((name, chars)) + # also store these in the PUA 1 + self.table[pua_index][1] = name + pua_index += 1 + assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences) + self.exclusions = {} - for s in file: - s = s.strip() - if not s: - continue - if s[0] == '#': - continue - char = int(s.split()[0],16) - self.exclusions[char] = 1 + with open_data(COMPOSITION_EXCLUSIONS, version) as file: + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + char = int(s.split()[0],16) + self.exclusions[char] = 1 widths = [None] * 0x110000 - for s in open_data(EASTASIAN_WIDTH, version): - s = s.strip() - if not s: - continue - if s[0] == '#': - continue - s = s.split()[0].split(';') - if '..' in s[0]: - first, last = [int(c, 16) for c in s[0].split('..')] - chars = list(range(first, last+1)) - else: - chars = [int(s[0], 16)] - for char in chars: - widths[char] = s[1] + with open_data(EASTASIAN_WIDTH, version) as file: + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + s = s.split()[0].split(';') + if '..' in s[0]: + first, last = [int(c, 16) for c in s[0].split('..')] + chars = list(range(first, last+1)) + else: + chars = [int(s[0], 16)] + for char in chars: + widths[char] = s[1] + for i in range(0, 0x110000): if table[i] is not None: table[i].append(widths[i]) @@ -888,36 +1024,39 @@ class UnicodeData: for i in range(0, 0x110000): if table[i] is not None: table[i].append(set()) - for s in open_data(DERIVED_CORE_PROPERTIES, version): - s = s.split('#', 1)[0].strip() - if not s: - continue - r, p = s.split(";") - r = r.strip() - p = p.strip() - if ".." in r: - first, last = [int(c, 16) for c in r.split('..')] - chars = list(range(first, last+1)) - else: - chars = [int(r, 16)] - for char in chars: - if table[char]: - # Some properties (e.g. Default_Ignorable_Code_Point) - # apply to unassigned code points; ignore them - table[char][-1].add(p) - - for s in open_data(LINE_BREAK, version): - s = s.partition('#')[0] - s = [i.strip() for i in s.split(';')] - if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: - continue - if '..' not in s[0]: - first = last = int(s[0], 16) - else: - first, last = [int(c, 16) for c in s[0].split('..')] - for char in range(first, last+1): - table[char][-1].add('Line_Break') + with open_data(DERIVED_CORE_PROPERTIES, version) as file: + for s in file: + s = s.split('#', 1)[0].strip() + if not s: + continue + + r, p = s.split(";") + r = r.strip() + p = p.strip() + if ".." in r: + first, last = [int(c, 16) for c in r.split('..')] + chars = list(range(first, last+1)) + else: + chars = [int(r, 16)] + for char in chars: + if table[char]: + # Some properties (e.g. Default_Ignorable_Code_Point) + # apply to unassigned code points; ignore them + table[char][-1].add(p) + + with open_data(LINE_BREAK, version) as file: + for s in file: + s = s.partition('#')[0] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: + continue + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + table[char][-1].add('Line_Break') # We only want the quickcheck properties # Format: NF?_QC; Y(es)/N(o)/M(aybe) @@ -928,31 +1067,33 @@ class UnicodeData: # for older versions, and no delta records will be created. quickchecks = [0] * 0x110000 qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split() - for s in open_data(DERIVEDNORMALIZATION_PROPS, version): - if '#' in s: - s = s[:s.index('#')] - s = [i.strip() for i in s.split(';')] - if len(s) < 2 or s[1] not in qc_order: - continue - quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No - quickcheck_shift = qc_order.index(s[1])*2 - quickcheck <<= quickcheck_shift - if '..' not in s[0]: - first = last = int(s[0], 16) - else: - first, last = [int(c, 16) for c in s[0].split('..')] - for char in range(first, last+1): - assert not (quickchecks[char]>>quickcheck_shift)&3 - quickchecks[char] |= quickcheck + with open_data(DERIVEDNORMALIZATION_PROPS, version) as file: + for s in file: + if '#' in s: + s = s[:s.index('#')] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in qc_order: + continue + quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No + quickcheck_shift = qc_order.index(s[1])*2 + quickcheck <<= quickcheck_shift + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + assert not (quickchecks[char]>>quickcheck_shift)&3 + quickchecks[char] |= quickcheck for i in range(0, 0x110000): if table[i] is not None: table[i].append(quickchecks[i]) - zip = zipfile.ZipFile(open_data(UNIHAN, version)) - if version == '3.2.0': - data = zip.open('Unihan-3.2.0.txt').read() - else: - data = zip.open('Unihan_NumericValues.txt').read() + with open_data(UNIHAN, version) as file: + zip = zipfile.ZipFile(file) + if version == '3.2.0': + data = zip.open('Unihan-3.2.0.txt').read() + else: + data = zip.open('Unihan_NumericValues.txt').read() for line in data.decode("utf-8").splitlines(): if not line.startswith('U+'): continue @@ -965,6 +1106,34 @@ class UnicodeData: # Patch the numeric field if table[i] is not None: table[i][8] = value + sc = self.special_casing = {} + with open_data(SPECIAL_CASING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[4]: + # We ignore all conditionals (since they depend on + # languages) except for one, which is hardcoded. See + # handle_capital_sigma in unicodeobject.c. + continue + c = int(data[0], 16) + lower = [int(char, 16) for char in data[1].split()] + title = [int(char, 16) for char in data[2].split()] + upper = [int(char, 16) for char in data[3].split()] + sc[c] = (lower, title, upper) + cf = self.case_folding = {} + if version != '3.2.0': + with open_data(CASE_FOLDING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[1] in "CF": + c = int(data[0], 16) + cf[c] = [int(char, 16) for char in data[2].split()] def uselatin1(self): # restrict character range to ISO Latin 1 diff --git a/Tools/unicode/mkstringprep.py b/Tools/unicode/mkstringprep.py index 868f5cd..ead020c 100644 --- a/Tools/unicode/mkstringprep.py +++ b/Tools/unicode/mkstringprep.py @@ -1,4 +1,5 @@ -import re, unicodedata, sys +import re, sys +from unicodedata import ucd_3_2_0 as unicodedata if sys.maxunicode == 65535: raise RuntimeError("need UCS-4 Python") @@ -37,16 +38,20 @@ def compact_set(l): tuple.append((prev,prev+span+1)) else: single.append(prev) - tuple = " + ".join(["list(range(%d,%d))" % t for t in tuple]) + if not single and len(tuple) == 1: + tuple = "range(%d,%d)" % tuple[0] + else: + tuple = " + ".join("list(range(%d,%d))" % t for t in tuple) if not single: return "set(%s)" % tuple if not tuple: - return "set(%s)" % repr(single) - return "set(%s + %s)" % (repr(single),tuple) + return "set(%r)" % (single,) + return "set(%r + %s)" % (single, tuple) ############## Read the tables in the RFC ####################### -data = open("rfc3454.txt").readlines() +with open("rfc3454.txt") as f: + data = f.readlines() tables = [] curname = None @@ -55,8 +60,7 @@ for l in data: if not l: continue # Skip RFC page breaks - if l.startswith("Hoffman & Blanchet") or\ - l.startswith("RFC 3454"): + if l.startswith(("Hoffman & Blanchet", "RFC 3454")): continue # Find start/end lines m = re.match("----- (Start|End) Table ([A-Z](.[0-9])+) -----", l) @@ -71,6 +75,8 @@ for l in data: else: if not curname: raise RuntimeError("End without start", l) + if curname != m.group(2): + raise RuntimeError("Unexpected end", l) curname = None continue if not curname: @@ -113,10 +119,10 @@ There are two kinds of tables: sets, for which a member test is provided, and mappings, for which a mapping function is provided. \"\"\" -import unicodedata +from unicodedata import ucd_3_2_0 as unicodedata """) -print("assert unicodedata.unidata_version == %s" % repr(unicodedata.unidata_version)) +print("assert unicodedata.unidata_version == %r" % (unicodedata.unidata_version,)) # A.1 is the table of unassigned characters # XXX Plane 15 PUA is listed as unassigned in Python. @@ -173,15 +179,15 @@ assert name == "B.3" b3_exceptions = {} for k,v in table_b2.items(): - if map(ord, unichr(k).lower()) != v: - b3_exceptions[k] = u"".join(map(unichr,v)) + if list(map(ord, chr(k).lower())) != v: + b3_exceptions[k] = "".join(map(chr,v)) b3 = sorted(b3_exceptions.items()) print(""" b3_exceptions = {""") -for i,(k,v) in enumerate(b3): - print("0x%x:%s," % (k, repr(v)), end=' ') +for i, kv in enumerate(b3): + print("0x%x:%a," % kv, end=' ') if i % 4 == 3: print() print("}") @@ -224,7 +230,7 @@ print(""" def map_table_b2(a): al = map_table_b3(a) b = unicodedata.normalize("NFKC", al) - bl = u"".join([map_table_b3(ch) for ch in b]) + bl = "".join([map_table_b3(ch) for ch in b]) c = unicodedata.normalize("NFKC", bl) if b != c: return c @@ -240,7 +246,7 @@ assert table == {0x20:0x20} print(""" def in_table_c11(code): - return code == u" " + return code == " " """) # C.1.2 is the rest of all space characters @@ -249,12 +255,12 @@ del tables[0] assert name == "C.1.2" # table = set(table.keys()) -# Zs = set(gen_category(["Zs"])) - set([0x20]) +# Zs = set(gen_category(["Zs"])) - {0x20} # assert Zs == table print(""" def in_table_c12(code): - return unicodedata.category(code) == "Zs" and code != u" " + return unicodedata.category(code) == "Zs" and code != " " def in_table_c11_c12(code): return unicodedata.category(code) == "Zs" diff --git a/Tools/unittestgui/unittestgui.py b/Tools/unittestgui/unittestgui.py index b526646..c3b5fa4 100644..100755 --- a/Tools/unittestgui/unittestgui.py +++ b/Tools/unittestgui/unittestgui.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ GUI framework and application for use with Python unit testing framework. Execute tests written using the framework provided by the 'unittest' module. @@ -28,7 +28,6 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ __author__ = "Steve Purcell (stephen_purcell@yahoo.com)" -__version__ = "$Revision: 1.7 $"[11:-2] import sys import traceback |