diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2015-09-22 08:20:03 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2015-09-22 08:20:03 (GMT) |
| commit | 5420a19e5c690cb9c96babceac44c26ff6f2167d (patch) | |
| tree | 09d42b476605f7860fbaebb136d4f8e144f14159 | |
| parent | 68d8dd01a2433624b814b9b3fa5a2a7438d4e547 (diff) | |
| parent | 256865fe128c764644470f0250d960bdf1d51925 (diff) | |
| download | tcl-5420a19e5c690cb9c96babceac44c26ff6f2167d.zip tcl-5420a19e5c690cb9c96babceac44c26ff6f2167d.tar.gz tcl-5420a19e5c690cb9c96babceac44c26ff6f2167d.tar.bz2 | |
merge trunk
56 files changed, 1203 insertions, 879 deletions
diff --git a/doc/AddErrInfo.3 b/doc/AddErrInfo.3 index d4bf7d5..e6563a0 100644 --- a/doc/AddErrInfo.3 +++ b/doc/AddErrInfo.3 @@ -4,7 +4,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_AddErrorInfo 3 8.5 Tcl "Tcl Library Procedures" .so man.macros .BS diff --git a/doc/DString.3 b/doc/DString.3 index f7567dc..b93f119 100644 --- a/doc/DString.3 +++ b/doc/DString.3 @@ -4,7 +4,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_DString 3 7.4 Tcl "Tcl Library Procedures" .so man.macros .BS diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 7580ae4..daab91d 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_GetEncoding 3 "8.1" Tcl "Tcl Library Procedures" .so man.macros .BS @@ -70,7 +70,7 @@ desired. .AP "const char" *name in Name of encoding to load. .AP Tcl_Encoding encoding in -The encoding to query, free, or use for converting text. If \fIencoding\fR is +The encoding to query, free, or use for converting text. If \fIencoding\fR is NULL, the current system encoding is used. .AP Tcl_Obj *objPtr in Name of encoding to get token for. @@ -80,17 +80,17 @@ Points to storage where encoding token is to be written. For the \fBTcl_ExternalToUtf\fR functions, an array of bytes in the specified encoding that are to be converted to UTF-8. For the \fBTcl_UtfToExternal\fR and \fBTcl_WinUtfToTChar\fR functions, an array of -UTF-8 characters to be converted to the specified encoding. +UTF-8 characters to be converted to the specified encoding. .AP "const TCHAR" *tsrc in An array of Windows TCHAR characters to convert to UTF-8. -.AP int srcLen in -Length of \fIsrc\fR or \fItsrc\fR in bytes. If the length is negative, the +.AP int srcLen in +Length of \fIsrc\fR or \fItsrc\fR in bytes. If the length is negative, the encoding-specific length of the string is used. .AP Tcl_DString *dstPtr out Pointer to an uninitialized or free \fBTcl_DString\fR in which the converted result will be stored. .AP int flags in -Various flag bits OR-ed together. +Various flag bits OR-ed together. \fBTCL_ENCODING_START\fR signifies that the source buffer is the first block in a (potentially multi-block) input stream, telling the conversion routine to reset to an initial state and @@ -102,7 +102,7 @@ byte is converted and then to reset to an initial state. \fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should return immediately upon reading a source character that does not exist in the target encoding; otherwise a default fallback character will -automatically be substituted. +automatically be substituted. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -110,7 +110,7 @@ state in \fI*statePtr\fR after \fIsrc\fR (the buffer containing the current piece) has been converted; that state information must be passed back when converting the next piece of the stream so the conversion routine knows what state it was in when it left off at the end of the -last piece. May be NULL, in which case the value specified for \fIflags\fR +last piece. May be NULL, in which case the value specified for \fIflags\fR is ignored and the source buffer is assumed to contain the complete string to convert. .AP char *dst out @@ -131,11 +131,11 @@ stored in the output buffer. May be NULL. .AP Tcl_DString *bufPtr out Storage for the prescribed system encoding name. .AP "const Tcl_EncodingType" *typePtr in -Structure that defines a new type of encoding. +Structure that defines a new type of encoding. .AP Tcl_Obj *searchPath in List of filesystem directories in which to search for encoding data files. .AP "const char" *path in -A path to the location of the encoding file. +A path to the location of the encoding file. .BE .SH INTRODUCTION .PP @@ -174,13 +174,13 @@ The first time \fIname\fR is seen, \fBTcl_GetEncoding\fR returns an encoding with a reference count of 1. If the same \fIname\fR is requested further times, then the reference count for that encoding is incremented without the overhead of allocating a new encoding and all its associated -data structures. +data structures. .PP When an \fIencoding\fR is no longer needed, \fBTcl_FreeEncoding\fR should be called to release it. When an \fIencoding\fR is no longer in use anywhere (i.e., it has been freed as many times as it has been gotten) \fBTcl_FreeEncoding\fR will release all storage the encoding was using -and delete it from the database. +and delete it from the database. .PP \fBTcl_GetEncodingFromObj\fR treats the string representation of \fIobjPtr\fR as an encoding name, and finds an encoding with that @@ -195,7 +195,7 @@ on the resulting encoding token when that token will no longer be used. .PP \fBTcl_ExternalToUtfDString\fR converts a source buffer \fIsrc\fR from the -specified \fIencoding\fR into UTF-8. The converted bytes are stored in +specified \fIencoding\fR into UTF-8. The converted bytes are stored in \fIdstPtr\fR, which is then null-terminated. The caller should eventually call \fBTcl_DStringFree\fR to free any information stored in \fIdstPtr\fR. When converting, if any of the characters in the source buffer cannot be @@ -221,17 +221,17 @@ sequence, but more bytes were needed to complete this sequence. A subsequent call to the conversion routine should pass a buffer containing the unconverted bytes that remained in \fIsrc\fR plus some further bytes from the source stream to properly convert the formerly split-up multibyte -sequence. +sequence. .IP \fBTCL_CONVERT_SYNTAX\fR 29 The source buffer contained an invalid character sequence. This may occur if the input stream has been damaged or if the input encoding method was misidentified. .IP \fBTCL_CONVERT_UNKNOWN\fR 29 The source buffer contained a character that could not be represented in -the target encoding and \fBTCL_ENCODING_STOPONERROR\fR was specified. +the target encoding and \fBTCL_ENCODING_STOPONERROR\fR was specified. .RE .LP -\fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 +\fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 into the specified \fIencoding\fR. The converted bytes are stored in \fIdstPtr\fR, which is then terminated with the appropriate encoding-specific null. The caller should eventually call \fBTcl_DStringFree\fR to free any @@ -261,7 +261,7 @@ Unicode encoding. .PP \fBTcl_GetEncodingName\fR is roughly the inverse of \fBTcl_GetEncoding\fR. Given an \fIencoding\fR, the return value is the \fIname\fR argument that -was used to create the encoding. The string returned by +was used to create the encoding. The string returned by \fBTcl_GetEncodingName\fR is only guaranteed to persist until the \fIencoding\fR is deleted. The caller must not modify this string. .PP @@ -300,9 +300,9 @@ reference count of 1. If an encoding with the specified \fIname\fR already exists, then its entry in the database is replaced with the new encoding; the token for the old encoding will remain valid and continue to behave as before, but users of the new token will now call the new -encoding procedures. +encoding procedures. .PP -The \fItypePtr\fR argument to \fBTcl_CreateEncoding\fR contains information +The \fItypePtr\fR argument to \fBTcl_CreateEncoding\fR contains information about the name of the encoding and the procedures that will be called to convert between this encoding and UTF-8. It is defined as follows: .PP @@ -314,7 +314,7 @@ typedef struct Tcl_EncodingType { Tcl_EncodingFreeProc *\fIfreeProc\fR; ClientData \fIclientData\fR; int \fInullSize\fR; -} \fBTcl_EncodingType\fR; +} \fBTcl_EncodingType\fR; .CE .PP The \fIencodingName\fR provides a string name for the encoding, by @@ -344,12 +344,12 @@ type \fBTcl_EncodingConvertProc\fR: .CS typedef int \fBTcl_EncodingConvertProc\fR( ClientData \fIclientData\fR, - const char *\fIsrc\fR, - int \fIsrcLen\fR, - int \fIflags\fR, + const char *\fIsrc\fR, + int \fIsrcLen\fR, + int \fIflags\fR, Tcl_EncodingState *\fIstatePtr\fR, - char *\fIdst\fR, - int \fIdstLen\fR, + char *\fIdst\fR, + int \fIdstLen\fR, int *\fIsrcReadPtr\fR, int *\fIdstWrotePtr\fR, int *\fIdstCharsPtr\fR); @@ -365,12 +365,12 @@ documented at the top, to \fBTcl_ExternalToUtf\fR or \fBTcl_UtfToExternal\fR, with the following exceptions. If the \fIsrcLen\fR argument to one of those high-level functions is negative, the value passed to the callback procedure will be the appropriate -encoding-specific string length of \fIsrc\fR. If any of the \fIsrcReadPtr\fR, +encoding-specific string length of \fIsrc\fR. If any of the \fIsrcReadPtr\fR, \fIdstWrotePtr\fR, or \fIdstCharsPtr\fR arguments to one of the high-level functions is NULL, the corresponding value passed to the callback procedure will be a non-NULL location. .PP -The callback procedure \fIfreeProc\fR, if non-NULL, should match the type +The callback procedure \fIfreeProc\fR, if non-NULL, should match the type \fBTcl_EncodingFreeProc\fR: .PP .CS @@ -380,11 +380,11 @@ typedef void \fBTcl_EncodingFreeProc\fR( .PP This \fIfreeProc\fR function is called when the encoding is deleted. The \fIclientData\fR parameter is the same as the \fIclientData\fR field -specified to \fBTcl_CreateEncoding\fR when the encoding was created. +specified to \fBTcl_CreateEncoding\fR when the encoding was created. .PP \fBTcl_GetEncodingSearchPath\fR and \fBTcl_SetEncodingSearchPath\fR are called to access and set the list of filesystem directories searched -for encoding data files. +for encoding data files. .PP The value returned by \fBTcl_GetEncodingSearchPath\fR is the value stored by the last successful call to @@ -408,7 +408,7 @@ encoding files that can be loaded using the same mechanism. These encoding files contain information about the tables and/or escape sequences used to map between an external encoding and Unicode. The external encoding may consist of single-byte, multi-byte, or double-byte -characters. +characters. .PP Each dynamically-loadable encoding is represented as a text file. The initial line of the file, beginning with a @@ -432,9 +432,9 @@ many Japanese computers. .IP "[4] \fBE\fR" An escape-sequence encoding, specifying that certain sequences of bytes do not represent characters, but commands that describe how following bytes -should be interpreted. +should be interpreted. .PP -The rest of the lines in the file depend on the type. +The rest of the lines in the file depend on the type. .PP Cases [1], [2], and [3] are collectively referred to as table-based encoding files. The lines in a table-based encoding file are in the same @@ -485,7 +485,7 @@ The third line of the file is three numbers. The first number is the fallback character (in base 16) to use when converting from UTF-8 to this encoding. The second number is a \fB1\fR if this file represents the encoding for a symbol font, or \fB0\fR otherwise. The last number (in base -10) is how many pages of data follow. +10) is how many pages of data follow. .PP Subsequent lines in the example above are pages that describe how to map from the encoding into 2-byte Unicode. The first line in a page identifies diff --git a/doc/Method.3 b/doc/Method.3 index 550b64a..225da00 100644 --- a/doc/Method.3 +++ b/doc/Method.3 @@ -67,7 +67,9 @@ The class to create the method in. The name of the method to create. Should not be NULL unless creating constructors or destructors. .AP int isPublic in -A boolean flag saying whether the method is to be exported. +A flag saying what the visibility of the method is. The only supported public +values of this flag are 0 for a non-exported method, and 1 for an exported +method. .AP Tcl_MethodType *methodTypePtr in A description of the type of the method to create, or the type of method to compare against. diff --git a/doc/Object.3 b/doc/Object.3 index 55451ab..bf80fe2 100644 --- a/doc/Object.3 +++ b/doc/Object.3 @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_Obj 3 8.5 Tcl "Tcl Library Procedures" .so man.macros .BS @@ -94,7 +94,7 @@ Also, most Tcl values are only read and never modified. This is especially true for procedure arguments, which can be shared between the caller and the called procedure. Assignment and argument binding is done by -simply assigning a pointer to the value. +simply assigning a pointer to the value. Reference counting is used to determine when it is safe to reclaim a value's storage. .PP diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index cca76c2..582ff4b 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -115,7 +115,7 @@ Used for error reporting and to look up a channel registered in it. The name of a local or network file. .AP "const char" *mode in Specifies how the file is to be accessed. May have any of the values -allowed for the \fImode\fR argument to the Tcl \fBopen\fR command. +allowed for the \fImode\fR argument to the Tcl \fBopen\fR command. .AP int permissions in POSIX-style permission flags such as 0644. If a new file is created, these permissions will be set on the created file. @@ -141,7 +141,7 @@ file descriptor, for Windows it is a HANDLE. OR-ed combination of \fBTCL_READABLE\fR and \fBTCL_WRITABLE\fR to indicate what operations are valid on \fIhandle\fR. .AP "const char" *channelName in -The name of the channel. +The name of the channel. .AP int *modePtr out Points at an integer variable that will receive an OR-ed combination of \fBTCL_READABLE\fR and \fBTCL_WRITABLE\fR denoting whether the channel is @@ -155,8 +155,8 @@ from a procedure such as \fBTcl_OpenFileChannel\fR. A pointer to a Tcl value in which to store the characters read from the channel. .AP int charsToRead in -The number of characters to read from the channel. If the channel's encoding -is \fBbinary\fR, this is equivalent to the number of bytes to read from the +The number of characters to read from the channel. If the channel's encoding +is \fBbinary\fR, this is equivalent to the number of bytes to read from the channel. .AP int appendFlag in If non-zero, data read from the channel will be appended to the value. @@ -169,7 +169,7 @@ be large enough to hold this many bytes. .AP Tcl_Obj *lineObjPtr in/out A pointer to a Tcl value in which to store the line read from the channel. The line read will be appended to the current value of the -value. +value. .AP Tcl_DString *lineRead in/out A pointer to a Tcl dynamic string in which to store the line read from the channel. Must have been initialized by the caller. The line read will be @@ -238,8 +238,8 @@ If an error occurs while opening the channel, \fBTcl_OpenFileChannel\fR returns NULL and records a POSIX error code that can be retrieved with \fBTcl_GetErrno\fR. In addition, if \fIinterp\fR is non-NULL, \fBTcl_OpenFileChannel\fR -leaves an error message in \fIinterp\fR's result after any error. -As of Tcl 8.4, the value-based API \fBTcl_FSOpenFileChannel\fR should +leaves an error message in \fIinterp\fR's result after any error. +As of Tcl 8.4, the value-based API \fBTcl_FSOpenFileChannel\fR should be used in preference to \fBTcl_OpenFileChannel\fR wherever possible. .PP The newly created channel is not registered in the supplied interpreter; to @@ -360,7 +360,7 @@ the standard channels (\fBstdout\fR, \fBstderr\fR, \fBstdin\fR), and will return Code not associated with a Tcl interpreter can call \fBTcl_DetachChannel\fR with \fIinterp\fR as NULL, to indicate to Tcl that it no longer holds a reference to that channel. If this is the last -reference to the channel, unlike \fBTcl_UnregisterChannel\fR, +reference to the channel, unlike \fBTcl_UnregisterChannel\fR, it will not be closed. .SH TCL_ISSTANDARDCHANNEL .PP @@ -368,7 +368,7 @@ it will not be closed. three standard channels, \fBstdin\fR, \fBstdout\fR or \fBstderr\fR. If so, it returns 1, otherwise 0. .PP -No attempt is made to check whether the given channel or the standard +No attempt is made to check whether the given channel or the standard channels are initialized or otherwise valid. .SH TCL_CLOSE .PP @@ -402,7 +402,7 @@ corresponding calls to \fBTcl_UnregisterChannel\fR. .SH "TCL_READCHARS AND TCL_READ" .PP \fBTcl_ReadChars\fR consumes bytes from \fIchannel\fR, converting the bytes -to UTF-8 based on the channel's encoding and storing the produced data in +to UTF-8 based on the channel's encoding and storing the produced data in \fIreadObjPtr\fR's string representation. The return value of \fBTcl_ReadChars\fR is the number of characters, up to \fIcharsToRead\fR, that were stored in \fIreadObjPtr\fR. If an error occurs while reading, the @@ -450,7 +450,7 @@ extensions. It consumes bytes from \fIchannel\fR and stores them in of \fBTcl_Read\fR is the number of bytes, up to \fIbytesToRead\fR, written in \fIreadBuf\fR. The buffer produced by \fBTcl_Read\fR is not null-terminated. Its contents are valid from the zeroth position up to and excluding the -position indicated by the return value. +position indicated by the return value. .PP \fBTcl_ReadRaw\fR is the same as \fBTcl_Read\fR but does not compensate for stacking. While \fBTcl_Read\fR (and the other functions @@ -507,7 +507,7 @@ to be null-terminated and it outputs everything up to the null. .PP Data queued for output may not appear on the output device immediately, due to internal buffering. If the data should appear immediately, call -\fBTcl_Flush\fR after the call to \fBTcl_WriteChars\fR, or set the +\fBTcl_Flush\fR after the call to \fBTcl_WriteChars\fR, or set the \fB\-buffering\fR option on the channel to \fBnone\fR. If you wish the data to appear as soon as a complete line is accepted for output, set the \fB\-buffering\fR option on the channel to \fBline\fR mode. @@ -525,7 +525,7 @@ channel. This is done even if the channel has no encoding. \fBTcl_WriteObj\fR is similar to \fBTcl_WriteChars\fR except it accepts a Tcl value whose contents will be output to the channel. The UTF-8 characters in \fIwriteObjPtr\fR's string representation are converted -to the channel's encoding and queued for output to \fIchannel\fR. +to the channel's encoding and queued for output to \fIchannel\fR. As a performance optimization, when writing to a channel with the encoding \fBbinary\fR, UTF-8 characters are not converted as they are written. Instead, the bytes in \fIwriteObjPtr\fR's internal representation as a diff --git a/doc/ParseCmd.3 b/doc/ParseCmd.3 index d34e52f..01b4065 100644 --- a/doc/ParseCmd.3 +++ b/doc/ParseCmd.3 @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_ParseCommand 3 8.3 Tcl "Tcl Library Procedures" .so man.macros .BS @@ -52,7 +52,7 @@ following \fIstart\fR up to the first null character. .AP int nested in Non-zero means that the script is part of a command substitution so an unquoted close bracket should be treated as a command terminator. If zero, -close brackets have no special meaning. +close brackets have no special meaning. .AP int append in Non-zero means that \fI*parsePtr\fR already contains valid tokens; the new tokens should be appended to those already present. Zero means that @@ -114,7 +114,7 @@ result, and no information is left at \fI*parsePtr\fR. enclosed in braces such as \fB{hello}\fR or \fB{string \et with \et tabs}\fR from the beginning of its argument \fIstart\fR. -The first character of \fIstart\fR must be \fB{\fR. +The first character of \fIstart\fR must be \fB{\fR. If the braced string was parsed successfully, \fBTcl_ParseBraces\fR returns \fBTCL_OK\fR, fills in the structure pointed to by \fIparsePtr\fR @@ -130,7 +130,7 @@ and no information is left at \fI*parsePtr\fR or \fI*termPtr\fR. \fBTcl_ParseQuotedString\fR parses a double-quoted string such as \fB"sum is [expr {$a+$b}]"\fR from the beginning of the argument \fIstart\fR. -The first character of \fIstart\fR must be \fB\N'34'\fR. +The first character of \fIstart\fR must be \fB\N'34'\fR. If the double-quoted string was parsed successfully, \fBTcl_ParseQuotedString\fR returns \fBTCL_OK\fR, fills in the structure pointed to by \fIparsePtr\fR @@ -146,7 +146,7 @@ and no information is left at \fI*parsePtr\fR or \fI*termPtr\fR. \fBTcl_ParseVarName\fR parses a Tcl variable reference such as \fB$abc\fR or \fB$x([expr {$index + 1}])\fR from the beginning of its \fIstart\fR argument. -The first character of \fIstart\fR must be \fB$\fR. +The first character of \fIstart\fR must be \fB$\fR. If a variable name was parsed successfully, \fBTcl_ParseVarName\fR returns \fBTCL_OK\fR and fills in the structure pointed to by \fIparsePtr\fR with information about the structure of the variable name @@ -180,7 +180,7 @@ a Tcl_Parse structure. The tokens typically consist of all the tokens in a word or all the tokens that make up the index for a reference to an array variable. \fBTcl_EvalTokensStandard\fR performs the substitutions requested by the tokens and concatenates the -resulting values. +resulting values. The return value from \fBTcl_EvalTokensStandard\fR is a Tcl completion code with one of the values \fBTCL_OK\fR, \fBTCL_ERROR\fR, \fBTCL_RETURN\fR, \fBTCL_BREAK\fR, or \fBTCL_CONTINUE\fR, or possibly @@ -228,7 +228,7 @@ character that terminates the last comment. If the command is not preceded by any comments, \fIcommentSize\fR is 0. \fBTcl_ParseCommand\fR also sets the \fIcommandStart\fR field to point to the first character of the first -word in the command (skipping any comments and leading space) and +word in the command (skipping any comments and leading space) and \fIcommandSize\fR gives the total number of bytes in the command, including the character pointed to by \fIcommandStart\fR up to and including the newline, close bracket, or semicolon character that @@ -336,7 +336,7 @@ just the \fBTCL_TOKEN_OPERATOR\fR token. Each operand is described by a \fBTCL_TOKEN_SUB_EXPR\fR token. Otherwise, the subexpression is a value described by one of the token types \fBTCL_TOKEN_WORD\fR, \fBTCL_TOKEN_TEXT\fR, -\fBTCL_TOKEN_BS\fR, \fBTCL_TOKEN_COMMAND\fR, +\fBTCL_TOKEN_BS\fR, \fBTCL_TOKEN_COMMAND\fR, \fBTCL_TOKEN_VARIABLE\fR, and \fBTCL_TOKEN_SUB_EXPR\fR. The \fInumComponents\fR field counts the total number of sub-tokens that make up the subexpression; @@ -375,7 +375,7 @@ is always 0. After \fBTcl_ParseCommand\fR returns, the first token pointed to by the \fItokenPtr\fR field of the Tcl_Parse structure always has type \fBTCL_TOKEN_WORD\fR or -\fBTCL_TOKEN_SIMPLE_WORD\fR or \fBTCL_TOKEN_EXPAND_WORD\fR. +\fBTCL_TOKEN_SIMPLE_WORD\fR or \fBTCL_TOKEN_EXPAND_WORD\fR. It is followed by the sub-tokens that must be concatenated to produce the value of that word. The next token is the \fBTCL_TOKEN_WORD\fR or \fBTCL_TOKEN_SIMPLE_WORD\fR diff --git a/doc/StringObj.3 b/doc/StringObj.3 index cf8f6d3..7042cc8 100644 --- a/doc/StringObj.3 +++ b/doc/StringObj.3 @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_StringObj 3 8.1 Tcl "Tcl Library Procedures" .so man.macros .BS @@ -308,7 +308,7 @@ sprintf(buf, format, ...); \fBTcl_NewStringObj\fR(buf, -1); .CE .PP -but with greater convenience and no need to +but with greater convenience and no need to determine \fBSOME_SUITABLE_LENGTH\fR. The formatting is done with the same core formatting engine used by \fBTcl_Format\fR. This means the set of supported conversion specifiers is that of the \fBformat\fR command and @@ -329,8 +329,8 @@ Tcl_Obj *objPtr = \fBTcl_ObjPrintf\fR("Value is %d", x); .PP If the value of \fIformat\fR contains internal inconsistencies or invalid specifier formats, the formatted string result produced by -\fBTcl_ObjPrintf\fR will be an error message describing the error. -It is impossible however to provide runtime protection against +\fBTcl_ObjPrintf\fR will be an error message describing the error. +It is impossible however to provide runtime protection against mismatches between the format and any subsequent arguments. Compile-time protection may be provided by some compilers. .PP diff --git a/doc/Thread.3 b/doc/Thread.3 index ac5f2ba..5966a71 100644 --- a/doc/Thread.3 +++ b/doc/Thread.3 @@ -4,7 +4,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Threads 3 "8.1" Tcl "Tcl Library Procedures" .so man.macros .BS @@ -79,9 +79,10 @@ waited upon into it. .SH INTRODUCTION Beginning with the 8.1 release, the Tcl core is thread safe, which allows you to incorporate Tcl into multithreaded applications without -customizing the Tcl core. To enable Tcl multithreading support, -you must include the \fB\-\|\-enable-threads\fR option to \fBconfigure\fR -when you configure and compile your Tcl core. +customizing the Tcl core. Starting with the 8.6 release, Tcl +multithreading support is on by default. To disable Tcl multithreading +support, you must include the \fB\-\|\-disable-threads\fR option to +\fBconfigure\fR when you configure and compile your Tcl core. .PP An important constraint of the Tcl threads implementation is that \fIonly the thread that created a Tcl interpreter can use that @@ -126,7 +127,7 @@ will cause a memory leak. .PP The \fBTcl_GetThreadData\fR call returns a pointer to a block of thread-private data. Its argument is a key that is shared by all threads -and a size for the block of storage. The storage is automatically +and a size for the block of storage. The storage is automatically allocated and initialized to all zeros the first time each thread asks for it. The storage is automatically deallocated by \fBTcl_FinalizeThread\fR. .SS "SYNCHRONIZATION AND COMMUNICATION" diff --git a/doc/ToUpper.3 b/doc/ToUpper.3 index 587e76b..b933e9c 100644 --- a/doc/ToUpper.3 +++ b/doc/ToUpper.3 @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Tcl_UtfToUpper 3 "8.1" Tcl "Tcl Library Procedures" .so man.macros .BS @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH Utf 3 "8.1" Tcl "Tcl Library Procedures" .so man.macros .BS @@ -48,7 +48,7 @@ int int \fBTcl_UtfCharComplete\fR(\fIsrc, length\fR) .sp -int +int \fBTcl_NumUtfChars\fR(\fIsrc, length\fR) .sp const char * @@ -109,7 +109,7 @@ Pointer to the beginning of a UTF-8 string. .AP int index in The index of a character (not byte) in the UTF-8 string. .AP int *readPtr out -If non-NULL, filled with the number of bytes in the backslash sequence, +If non-NULL, filled with the number of bytes in the backslash sequence, including the backslash character. .AP char *dst out Buffer in which the bytes represented by the backslash sequence are stored. @@ -142,7 +142,7 @@ end and dereference non-existent or random memory; if the source buffer is known to be null-terminated, this will not happen. If the input is not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and -0x00ff and return 1. +0x00ff and return 1. .PP \fBTcl_UniCharToUtfDString\fR converts the given Unicode string to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR. @@ -210,12 +210,12 @@ length is negative, all bytes up to the first null byte are used. \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It returns a pointer to the first occurrence of the Tcl_UniChar \fIch\fR in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is -considered part of the UTF-8 string. +considered part of the UTF-8 string. .PP \fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings. It returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR in the null-terminated UTF-8 string \fIsrc\fR. The null terminator is -considered part of the UTF-8 string. +considered part of the UTF-8 string. .PP Given \fIsrc\fR, a pointer to some location in a UTF-8 string, \fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the @@ -239,7 +239,7 @@ characters. Behavior is undefined if a negative \fIindex\fR is given. .PP \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must -contain at least \fIindex\fR characters. This is equivalent to calling +contain at least \fIindex\fR characters. This is equivalent to calling \fBTcl_UtfNext\fR \fIindex\fR times. If a negative \fIindex\fR is given, the return pointer points to the first character in the source string. .PP diff --git a/doc/library.n b/doc/library.n index 775b7d9..6f8f265 100644 --- a/doc/library.n +++ b/doc/library.n @@ -4,7 +4,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" +'\" .TH library n "8.0" Tcl "Tcl Built-In Commands" .so man.macros .BS @@ -19,7 +19,7 @@ auto_execok, auto_import, auto_load, auto_mkindex, auto_qualify, auto_reset, tcl \fBauto_qualify \fIcommand namespace\fR \fBauto_reset\fR \fBtcl_findLibrary \fIbasename version patch initScript enVarName varName\fR -\fBparray \fIarrayName\fR +\fBparray \fIarrayName\fR ?\fIpattern\fR? \fBtcl_endOfWord \fIstr start\fR \fBtcl_startOfNextWord \fIstr start\fR \fBtcl_startOfPreviousWord \fIstr start\fR @@ -139,7 +139,7 @@ as its first characters then it is assumed to be a procedure definition and the next word of the line is taken as the procedure's name. Procedure definitions that do not appear in this way (e.g.\ they -have spaces before the \fBproc\fR) will not be indexed. If your +have spaces before the \fBproc\fR) will not be indexed. If your script contains .QW dangerous code, such as global initialization @@ -178,7 +178,7 @@ performing the actual auto-loading of functions at runtime. This is a standard search procedure for use by extensions during their initialization. They call this procedure to look for their script library in several standard directories. -The last component of the name of the library directory is +The last component of the name of the library directory is normally \fIbasenameversion\fR (e.g., tk8.0), but it might be .QW library @@ -196,9 +196,11 @@ bin or bin/\fIarch\fR directory; relative to the executable file in the current build tree; relative to the executable file in a parallel build tree. .TP -\fBparray \fIarrayName\fR -Prints on standard output the names and values of all the elements -in the array \fIarrayName\fR. +\fBparray \fIarrayName\fR ?\fIpattern\fR? +Prints on standard output the names and values of all the elements in the +array \fIarrayName\fR, or just the names that match \fIpattern\fR (using the +matching rules of \fBstring match\fR) and their values if \fIpattern\fR is +given. \fIArrayName\fR must be an array accessible to the caller of \fBparray\fR. It may be either local or global. .TP @@ -283,7 +285,7 @@ a default value is used. .TP \fBenv(TCLLIBPATH)\fR If set, then it must contain a valid Tcl list giving directories to -search during auto-load operations. Directories must be specified in +search during auto-load operations. Directories must be specified in Tcl format, using .QW / as the path separator, regardless of platform. @@ -312,7 +314,7 @@ Unix, words are comprised of numbers, letters or underscores. .SH "SEE ALSO" env(n), info(n), re_syntax(n) .SH KEYWORDS -auto-exec, auto-load, library, unknown, word, whitespace +auto-exec, auto-load, library, unknown, word, whitespace '\"Local Variables: '\"mode: nroff '\"End: @@ -37,7 +37,7 @@ the \fBoo::object\fR class, which is not publicly visible by default: oo::class create c { method count {} { \fBmy\fR variable counter - print [incr counter] + puts [incr counter] } } c create o @@ -122,7 +122,7 @@ oo::class create theSubclass { } } theSubclass create obj -oo::define obj method example args { +oo::objdefine obj method example args { puts "per-object method, args = $args" \fBnext\fR x {*}$args y \fBnext\fR @@ -176,7 +176,7 @@ oo::class create cache { } oo::object create demo -oo::define demo { +oo::objdefine demo { mixin cache method compute {a b c} { after 3000 \fI;# Simulate deep thought\fR diff --git a/doc/regexp.n b/doc/regexp.n index 17bf564..5fc2895 100644 --- a/doc/regexp.n +++ b/doc/regexp.n @@ -51,7 +51,7 @@ the \fB(?x)\fR embedded option (see the \fBre_syntax\fR manual page). .TP 15 \fB\-indices\fR . -Changes what is stored in the \fIsubMatchVar\fRs. +Changes what is stored in the \fImatchVar\fR and \fIsubMatchVar\fRs. Instead of storing the matching characters from \fIstring\fR, each variable will contain a list of two decimal strings giving the indices diff --git a/generic/regc_color.c b/generic/regc_color.c index f5d6dfd..92e0aad 100644 --- a/generic/regc_color.c +++ b/generic/regc_color.c @@ -259,7 +259,7 @@ newcolor( return COLORLESS; /* too many colors */ } n = cm->ncds * 2; - if (n < MAX_COLOR + 1) { + if (n > MAX_COLOR + 1) { n = MAX_COLOR + 1; } if (cm->cd == cm->cdspace) { @@ -777,18 +777,19 @@ dumpcolors( } /* - * It's hard to do this more efficiently. + * Unfortunately, it's hard to do this next bit more efficiently. + * + * Spencer's original coding has the loop iterating from CHR_MIN + * to CHR_MAX, but that's utterly unusable for 32-bit chr, or + * even 16-bit. For debugging purposes it seems fine to print + * only chr codes up to 1000 or so. */ - for (c=CHR_MIN ; c<CHR_MAX ; c++) { + for (c=CHR_MIN ; c<1000 ; c++) { if (GETCOLOR(cm, c) == co) { dumpchr(c, f); } } - assert(c == CHR_MAX); - if (GETCOLOR(cm, c) == co) { - dumpchr(c, f); - } fprintf(f, "\n"); } } diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index 0247521..d450d3e 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -81,6 +81,7 @@ addchr( struct cvec *cv, /* character vector */ pchr c) /* character to add */ { + assert(cv->nchrs < cv->chrspace); cv->chrs[cv->nchrs++] = (chr)c; } diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 70a0246..6208481 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -1139,24 +1139,6 @@ newline(void) } /* - - ch - return the chr sequence for regc_locale.c's fake collating element ch - * This helps confine use of CHR to this source file. Beware that the caller - * knows how long the sequence is. - ^ #ifdef REG_DEBUG - ^ static const chr *ch(void); - ^ #endif - */ -#ifdef REG_DEBUG -static const chr * -ch(void) -{ - static const chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; - - return chstr; -} -#endif - -/* - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 6db6684..476592d 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -49,6 +49,7 @@ newnfa( nfa = (struct nfa *) MALLOC(sizeof(struct nfa)); if (nfa == NULL) { + ERR(REG_ESPACE); return NULL; } @@ -1651,13 +1652,16 @@ compact( narcs = 0; for (s = nfa->states; s != NULL; s = s->next) { nstates++; - narcs += 1 + s->nouts + 1; - /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ + narcs += s->nouts + 1; /* need one extra for endmarker */ } + cnfa->stflags = (char *) MALLOC(nstates * sizeof(char)); cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) { + if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL) { + if (cnfa->stflags != NULL) { + FREE(cnfa->stflags); + } if (cnfa->states != NULL) { FREE(cnfa->states); } @@ -1680,9 +1684,8 @@ compact( ca = cnfa->arcs; for (s = nfa->states; s != NULL; s = s->next) { assert((size_t) s->no < nstates); + cnfa->stflags[s->no] = 0; cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ - ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) { switch (a->type) { @@ -1716,9 +1719,9 @@ compact( */ for (a = nfa->pre->outs; a != NULL; a = a->outchain) { - cnfa->states[a->to->no]->co = 1; + cnfa->stflags[a->to->no] = CNFA_NOPROGRESS; } - cnfa->states[nfa->pre->no]->co = 1; + cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS; } /* @@ -1762,6 +1765,7 @@ freecnfa( { assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; + FREE(cnfa->stflags); FREE(cnfa->states); FREE(cnfa->arcs); } @@ -1984,7 +1988,7 @@ dumpcnfa( } fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) { - dumpcstate(st, cnfa->states[st], cnfa, f); + dumpcstate(st, cnfa, f); } fflush(f); #endif @@ -1997,25 +2001,24 @@ dumpcnfa( /* - dumpcstate - dump a compacted-NFA state in human-readable form - ^ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); + ^ static void dumpcstate(int, struct cnfa *, FILE *); */ static void dumpcstate( int st, - struct carc *ca, struct cnfa *cnfa, FILE *f) { - int i; + struct carc *ca; int pos; - fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); + fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : "."); pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) { - if (ca[i].co < cnfa->ncolors) { - fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); + for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++) { + if (ca->co < cnfa->ncolors) { + fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to); } else { - fprintf(f, "\t:%ld:->%d", (long) ca[i].co-cnfa->ncolors,ca[i].to); + fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to); } if (pos == 5) { fprintf(f, "\n"); @@ -2024,7 +2027,7 @@ dumpcstate( pos++; } } - if (i == 1 || pos != 1) { + if (ca == cnfa->states[st] || pos != 1) { fprintf(f, "\n"); } fflush(f); diff --git a/generic/regcomp.c b/generic/regcomp.c index 3cebd28e..7bd9483 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -83,9 +83,6 @@ static int lexdigits(struct vars *, int, int, int); static int brenext(struct vars *, pchr); static void skip(struct vars *); static chr newline(void); -#ifdef REG_DEBUG -static const chr *ch(void); -#endif static chr chrnamed(struct vars *, const chr *, const chr *, pchr); /* === regc_color.c === */ static void initcm(struct vars *, struct colormap *); @@ -165,7 +162,7 @@ static void dumparc(struct arc *, struct state *, FILE *); #endif static void dumpcnfa(struct cnfa *, FILE *); #ifdef REG_DEBUG -static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); +static void dumpcstate(int, struct cnfa *, FILE *); #endif /* === regc_cvec.c === */ static struct cvec *clearcvec(struct cvec *); @@ -210,7 +207,7 @@ struct vars { struct subre *tree; /* subexpression tree */ struct subre *treechain; /* all tree nodes allocated */ struct subre *treefree; /* any free tree nodes */ - int ntree; /* number of tree nodes */ + int ntree; /* number of tree nodes, plus one */ struct cvec *cv; /* interface cvec */ struct cvec *cv2; /* utility cvec */ struct subre *lacons; /* lookahead-constraint vector */ @@ -223,13 +220,13 @@ struct vars { #define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ #define VISERR(vv) ((vv)->err != 0)/* have we seen an error yet? */ #define ISERR() VISERR(v) -#define VERR(vv,e) \ - ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define VERR(vv,e) ((vv)->nexttype = EOS, \ + (vv)->err = ((vv)->err ? (vv)->err : (e))) #define ERR(e) VERR(v, e) /* record an error */ #define NOERR() {if (ISERR()) return;} /* if error seen, return */ #define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ #define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ -#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ +#define INSIST(c, e) do { if (!(c)) ERR(e); } while (0) /* error if c false */ #define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ #define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) @@ -258,12 +255,14 @@ struct vars { ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND) /* static function list */ -static struct fns functions = { +static const struct fns functions = { rfree, /* regfree insides */ }; /* - compile - compile regular expression + * Note: on failure, no resources remain allocated, so regfree() + * need not be applied to re. ^ int compile(regex_t *, const chr *, size_t, int); */ int @@ -593,13 +592,15 @@ makesearch( break; } } + + /* + * We want to mark states as being in the list already by having non + * NULL tmp fields, but we can't just store the old slist value in tmp + * because that doesn't work for the first such state. Instead, the + * first list entry gets its own address in tmp. + */ if (b != NULL && s->tmp == NULL) { - /* - * Must be split if not already in the list (fixes bugs 505048, - * 230589, 840258, 504785). - */ - - s->tmp = slist; + s->tmp = (slist != NULL) ? slist : s; slist = s; } } @@ -620,7 +621,7 @@ makesearch( freearc(nfa, a); } } - s2 = s->tmp; + s2 = (s->tmp != s) ? s->tmp : NULL; s->tmp = NULL; /* clean up while we're at it */ } } @@ -982,6 +983,7 @@ parseqatom( NOERR(); assert(v->nextvalue > 0); atom = subre(v, 'b', BACKR, lp, rp); + NOERR(); subno = v->nextvalue; atom->subno = subno; EMPTYARC(lp, rp); /* temporarily, so there's something */ @@ -1102,11 +1104,17 @@ parseqatom( /* * Prepare a general-purpose state skeleton. * - * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] - * / / - * [lp] ----> [s2] ----bypass--------------------- + * In the no-backrefs case, we want this: + * + * [lp] ---> [s] ---prefix---> [begin] ---atom---> [end] ---rest---> [rp] + * + * where prefix is some repetitions of atom. In the general case we need + * + * [lp] ---> [s] ---iterator---> [s2] ---rest---> [rp] + * + * where the iterator wraps around [begin] ---atom---> [end] * - * where bypass is an empty, and prefix is some repetitions of atom + * We make the s state here for both cases; s2 is made below if needed */ s = newstate(v->nfa); /* first, new endpoints for the atom */ @@ -1117,11 +1125,9 @@ parseqatom( NOERR(); atom->begin = s; atom->end = s2; - s = newstate(v->nfa); /* and spots for prefix and bypass */ - s2 = newstate(v->nfa); + s = newstate(v->nfa); /* set up starting state */ NOERR(); EMPTYARC(lp, s); - EMPTYARC(lp, s2); NOERR(); /* @@ -1129,6 +1135,7 @@ parseqatom( */ t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); + NOERR(); t->left = atom; atomp = &t->left; @@ -1142,6 +1149,7 @@ parseqatom( assert(top->op == '=' && top->left == NULL && top->right == NULL); top->left = subre(v, '=', top->flags, top->begin, lp); + NOERR(); top->op = '.'; top->right = t; @@ -1166,27 +1174,8 @@ parseqatom( } /* - * It's quantifier time; first, turn x{0,...} into x{1,...}|empty - */ - - if (m == 0) { - EMPTYARC(s2, atom->end);/* the bypass */ - assert(PREF(qprefer) != 0); - f = COMBINE(qprefer, atom->flags); - t = subre(v, '|', f, lp, atom->end); - NOERR(); - t->left = atom; - t->right = subre(v, '|', PREF(f), s2, atom->end); - NOERR(); - t->right->left = subre(v, '=', 0, s2, atom->end); - NOERR(); - *atomp = t; - atomp = &t->left; - m = 1; - } - - /* - * Deal with the rest of the quantifier. + * It's quantifier time. If the atom is just a backref, we'll let it deal + * with quantifiers internally. */ if (atomtype == BACKREF) { @@ -1204,16 +1193,24 @@ parseqatom( atom->min = (short) m; atom->max = (short) n; atom->flags |= COMBINE(qprefer, atom->flags); + /* rest of branch can be strung starting from atom->end */ + s2 = atom->end; } else if (m == 1 && n == 1) { /* * No/vacuous quantifier: done. */ EMPTYARC(s, atom->begin); /* empty prefix */ - } else { + /* rest of branch can be strung starting from atom->end */ + s2 = atom->end; + } else if (m > 0 && !(atom->flags & BACKR)) { /* - * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second - * x + * If there's no backrefs involved, we can turn x{m,n} into + * x{m-1,n-1}x, with capturing parens in only the second x. This + * is valid because we only care about capturing matches from the + * final iteration of the quantifier. It's a win because we can + * implement the backref-free left side as a plain DFA node, since + * we don't really care where its submatches are. */ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); @@ -1226,6 +1223,24 @@ parseqatom( NOERR(); t->right = atom; *atomp = t; + /* rest of branch can be strung starting from atom->end */ + s2 = atom->end; + } else { + /* general case: need an iteration node */ + s2 = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, atom->end, s2); + NOERR(); + dupnfa(v->nfa, atom->begin, atom->end, s, s2); + repeat(v, s, s2, m, n); + f = COMBINE(qprefer, atom->flags); + t = subre(v, '*', f, s, s2); + NOERR(); + t->min = (short) m; + t->max = (short) n; + t->left = atom; + *atomp = t; + /* rest of branch is to be strung from iteration's end state */ } /* @@ -1234,10 +1249,10 @@ parseqatom( t = top->right; if (!(SEE('|') || SEE(stopper) || SEE(EOS))) { - t->right = parsebranch(v, stopper, type, atom->end, rp, 1); + t->right = parsebranch(v, stopper, type, s2, rp, 1); } else { - EMPTYARC(atom->end, rp); - t->right = subre(v, '=', 0, atom->end, rp); + EMPTYARC(s2, rp); + t->right = subre(v, '=', 0, s2, rp); } NOERR(); assert(SEE('|') || SEE(stopper) || SEE(EOS)); @@ -1304,6 +1319,8 @@ scannum( /* - repeat - replicate subNFA for quantifiers + * The sub-NFA strung from lp to rp is modified to represent m to n + * repetitions of its initial contents. * The duplication sequences used here are chosen carefully so that any * pointers starting out pointing into the subexpression end up pointing into * the last occurrence. (Note that it may not be strung between the same left @@ -1713,11 +1730,11 @@ subre( v->treechain = ret; } - assert(strchr("|.b(=", op) != NULL); + assert(strchr("=b|.*(", op) != NULL); ret->op = op; ret->flags = flags; - ret->retry = 0; + ret->id = 0; /* will be assigned later */ ret->subno = 0; ret->min = ret->max = 1; ret->left = NULL; @@ -1770,7 +1787,8 @@ freesrnode( } sr->flags = 0; - if (v != NULL) { + if (v != NULL && v->treechain != NULL) { + /* we're still parsing, maybe we can reuse the subre */ sr->left = v->treefree; v->treefree = sr; } else { @@ -1798,7 +1816,7 @@ optst( } /* - - numst - number tree nodes (assigning retry indexes) + - numst - number tree nodes (assigning "id" indexes) ^ static int numst(struct subre *, int); */ static int /* next number */ @@ -1811,7 +1829,7 @@ numst( assert(t != NULL); i = start; - t->retry = (short) i++; + t->id = (short) i++; if (t->left != NULL) { i = numst(t->left, i); } @@ -1823,6 +1841,19 @@ numst( /* - markst - mark tree nodes as INUSE + * Note: this is a great deal more subtle than it looks. During initial + * parsing of a regex, all subres are linked into the treechain list; + * discarded ones are also linked into the treefree list for possible reuse. + * After we are done creating all subres required for a regex, we run markst() + * then cleanst(), which results in discarding all subres not reachable from + * v->tree. We then clear v->treechain, indicating that subres must be found + * by descending from v->tree. This changes the behavior of freesubre(): it + * will henceforth FREE() unwanted subres rather than sticking them into the + * treefree list. (Doing that any earlier would result in dangling links in + * the treechain list.) This all means that freev() will clean up correctly + * if invoked before or after markst()+cleanst(); but it would not work if + * called partway through this state conversion, so we mustn't error out + * in or between these two functions. ^ static void markst(struct subre *); */ static void @@ -1929,24 +1960,26 @@ newlacon( struct state *end, int pos) { - struct subre *sub; int n; + struct subre *newlacons; + struct subre *sub; if (v->nlacons == 0) { - v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); n = 1; /* skip 0th */ - v->nlacons = 2; + newlacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); } else { - v->lacons = (struct subre *) REALLOC(v->lacons, - (v->nlacons+1)*sizeof(struct subre)); - n = v->nlacons++; + n = v->nlacons; + newlacons = (struct subre *) REALLOC(v->lacons, + (n + 1) * sizeof(struct subre)); } - if (v->lacons == NULL) { + if (newlacons == NULL) { ERR(REG_ESPACE); return 0; } + v->lacons = newlacons; + v->nlacons = n + 1; sub = &v->lacons[n]; sub->begin = begin; sub->end = end; @@ -1994,18 +2027,20 @@ rfree( g = (struct guts *) re->re_guts; re->re_guts = NULL; re->re_fns = NULL; - g->magic = 0; - freecm(&g->cmap); - if (g->tree != NULL) { - freesubre(NULL, g->tree); - } - if (g->lacons != NULL) { - freelacons(g->lacons, g->nlacons); - } - if (!NULLCNFA(g->search)) { - freecnfa(&g->search); + if (g != NULL) { + g->magic = 0; + freecm(&g->cmap); + if (g->tree != NULL) { + freesubre(NULL, g->tree); + } + if (g->lacons != NULL) { + freelacons(g->lacons, g->nlacons); + } + if (!NULLCNFA(g->search)) { + freecnfa(&g->search); + } + FREE(g); } - FREE(g); } /* @@ -2037,7 +2072,7 @@ dump( fprintf(f, "\n\n\n========= DUMP ==========\n"); fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", - re->re_nsub, re->re_info, re->re_csize, g->ntree); + (int) re->re_nsub, re->re_info, re->re_csize, g->ntree); dumpcolors(&g->cmap, f); if (!NULLCNFA(g->search)) { @@ -2146,14 +2181,14 @@ stid( size_t bufsize) { /* - * Big enough for hex int or decimal t->retry? + * Big enough for hex int or decimal t->id? */ - if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) { + if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->id)*3 + 1) { return "unable"; } - if (t->retry != 0) { - sprintf(buf, "%d", t->retry); + if (t->id != 0) { + sprintf(buf, "%d", t->id); } else { sprintf(buf, "%p", t); } diff --git a/generic/rege_dfa.c b/generic/rege_dfa.c index 920ea6c..e5f22c4 100644 --- a/generic/rege_dfa.c +++ b/generic/rege_dfa.c @@ -84,7 +84,7 @@ longest( if (v->eflags®_FTRACE) { while (cp < realstop) { - FDEBUG(("+++ at c%d +++\n", css - d->ssets)); + FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets))); co = GETCOLOR(cm, *cp); FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); ss = css->outs[co]; @@ -118,7 +118,7 @@ longest( * Shutdown. */ - FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); + FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets))); if (cp == v->stop && stop == v->stop) { if (hitstopp != NULL) { *hitstopp = 1; @@ -213,7 +213,7 @@ shortest( if (v->eflags®_FTRACE) { while (cp < realmax) { - FDEBUG(("--- at c%d ---\n", css - d->ssets)); + FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets))); co = GETCOLOR(cm, *cp); FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); ss = css->outs[co]; @@ -516,14 +516,14 @@ miss( gotState = 0; for (i = 0; i < d->nstates; i++) { if (ISBSET(css->states, i)) { - for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) { + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { if (ca->co == co) { BSET(d->work, ca->to); gotState = 1; if (ca->to == cnfa->post) { isPost = 1; } - if (!cnfa->states[ca->to]->co) { + if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS)) { noProgress = 0; } FDEBUG(("%d -> %d\n", i, ca->to)); @@ -537,8 +537,8 @@ miss( doLAConstraints = 0; for (i = 0; i < d->nstates; i++) { if (ISBSET(d->work, i)) { - for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) { - if (ca->co <= cnfa->ncolors) { + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { + if (ca->co < cnfa->ncolors) { continue; /* NOTE CONTINUE */ } sawLAConstraints = 1; @@ -553,7 +553,7 @@ miss( if (ca->to == cnfa->post) { isPost = 1; } - if (!cnfa->states[ca->to]->co) { + if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS)) { noProgress = 0; } FDEBUG(("%d :> %d\n", i, ca->to)); @@ -572,7 +572,7 @@ miss( for (p = d->ssets, i = d->nssused; i > 0; p++, i--) { if (HIT(h, d->work, p, d->wordsper)) { - FDEBUG(("cached c%d\n", p - d->ssets)); + FDEBUG(("cached c%d\n", (int) (p - d->ssets))); break; /* NOTE BREAK OUT */ } } @@ -594,7 +594,8 @@ miss( } if (!sawLAConstraints) { /* lookahead conds. always cache miss */ - FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); + FDEBUG(("c%d[%d]->c%d\n", + (int) (css - d->ssets), co, (int) (p - d->ssets))); css->outs[co] = p; css->inchain[co] = p->ins; p->ins.ss = css; @@ -663,7 +664,7 @@ getVacantSS( ap = ss->ins; while ((p = ap.ss) != NULL) { co = ap.co; - FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); + FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long)co)); p->outs[co] = NULL; ap = p->inchain[co]; p->inchain[co].ss = NULL; /* paranoia */ @@ -680,7 +681,7 @@ getVacantSS( if (p == NULL) { continue; /* NOTE CONTINUE */ } - FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); + FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets))); if (p->ins.ss == ss && p->ins.co == i) { p->ins = ss->inchain[i]; } else { @@ -772,7 +773,7 @@ pickNextSS( if ((ss->lastseen == NULL || ss->lastseen < ancient) && !(ss->flags&LOCKED)) { d->search = ss + 1; - FDEBUG(("replacing c%d\n", ss - d->ssets)); + FDEBUG(("replacing c%d\n", (int) (ss - d->ssets))); return ss; } } @@ -780,7 +781,7 @@ pickNextSS( if ((ss->lastseen == NULL || ss->lastseen < ancient) && !(ss->flags&LOCKED)) { d->search = ss + 1; - FDEBUG(("replacing c%d\n", ss - d->ssets)); + FDEBUG(("replacing c%d\n", (int) (ss - d->ssets))); return ss; } } diff --git a/generic/regerror.c b/generic/regerror.c index a1a0163..49d93ed 100644 --- a/generic/regerror.c +++ b/generic/regerror.c @@ -41,7 +41,7 @@ static const char unk[] = "*** unknown regex error code 0x%x ***"; * Struct to map among codes, code names, and explanations. */ -static struct rerr { +static const struct rerr { int code; const char *name; const char *explain; @@ -62,7 +62,7 @@ regerror( char *errbuf, /* Result buffer (unless errbuf_size==0) */ size_t errbuf_size) /* Available space in errbuf, can be 0 */ { - struct rerr *r; + const struct rerr *r; const char *msg; char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */ size_t len; diff --git a/generic/regexec.c b/generic/regexec.c index 7ca523b..dfecf65 100644 --- a/generic/regexec.c +++ b/generic/regexec.c @@ -107,13 +107,13 @@ struct vars { chr *start; /* start of string */ chr *stop; /* just past end of string */ int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ + struct dfa **subdfas; /* per-subre DFAs */ struct smalldfa dfa1; struct smalldfa dfa2; }; #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ #define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e))) #define ERR(e) VERR(v, e) /* record an error */ #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ #define OFF(p) ((p) - v->start) @@ -126,21 +126,20 @@ struct vars { /* automatically gathered by fwd; do not hand-edit */ /* === regexec.c === */ int exec(regex_t *, const chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int); +static struct dfa *getsubdfa(struct vars *, struct subre *); static int simpleFind(struct vars *const, struct cnfa *const, struct colormap *const); static int complicatedFind(struct vars *const, struct cnfa *const, struct colormap *const); static int complicatedFindLoop(struct vars *const, struct cnfa *const, struct colormap *const, struct dfa *const, struct dfa *const, chr **const); -static void zapSubexpressions(regmatch_t *const, const size_t); -static void zapSubtree(struct vars *const, struct subre *const); +static void zapallsubs(regmatch_t *const, const size_t); +static void zaptreesubs(struct vars *const, struct subre *const); static void subset(struct vars *const, struct subre *const, chr *const, chr *const); -static int dissect(struct vars *const, struct subre *, chr *const, chr *const); -static int concatenationDissect(struct vars *const, struct subre *const, chr *const, chr *const); -static int alternationDissect(struct vars *const, struct subre *, chr *const, chr *const); -static inline int complicatedDissect(struct vars *const, struct subre *const, chr *const, chr *const); -static int complicatedCapturingDissect(struct vars *const, struct subre *const, chr *const, chr *const); -static int complicatedConcatenationDissect(struct vars *const, struct subre *const, chr *const, chr *const); -static int complicatedReversedDissect(struct vars *const, struct subre *const, chr *const, chr *const); -static int complicatedBackrefDissect(struct vars *const, struct subre *const, chr *const, chr *const); -static int complicatedAlternationDissect(struct vars *const, struct subre *, chr *const, chr *const); +static int cdissect(struct vars *, struct subre *, chr *, chr *); +static int ccondissect(struct vars *, struct subre *, chr *, chr *); +static int crevcondissect(struct vars *, struct subre *, chr *, chr *); +static int cbrdissect(struct vars *, struct subre *, chr *, chr *); +static int caltdissect(struct vars *, struct subre *, chr *, chr *); +static int citerdissect(struct vars *, struct subre *, chr *, chr *); +static int creviterdissect(struct vars *, struct subre *, chr *, chr *); /* === rege_dfa.c === */ static chr *longest(struct vars *const, struct dfa *const, chr *const, chr *const, int *const); static chr *shortest(struct vars *const, struct dfa *const, chr *const, chr *const, chr *const, chr **const, int *const); @@ -174,10 +173,11 @@ exec( AllocVars(v); int st, backref; size_t n; + size_t i; #define LOCALMAT 20 regmatch_t mat[LOCALMAT]; -#define LOCALMEM 40 - regoff_t mem[LOCALMEM]; +#define LOCALDFAS 40 + struct dfa *subdfas[LOCALDFAS]; /* * Sanity checks. @@ -235,28 +235,20 @@ exec( v->start = (chr *)string; v->stop = (chr *)string + len; v->err = 0; - if (backref) { - /* - * Need retry memory. - */ - - assert(v->g->ntree >= 0); - n = (size_t)v->g->ntree; - if (n <= LOCALMEM) { - v->mem = mem; - } else { - v->mem = (regoff_t *) MALLOC(n*sizeof(regoff_t)); - } - if (v->mem == NULL) { - if (v->pmatch != pmatch && v->pmatch != mat) { - FREE(v->pmatch); - } - FreeVars(v); - return REG_ESPACE; - } - } else { - v->mem = NULL; + assert(v->g->ntree >= 0); + n = (size_t) v->g->ntree; + if (n <= LOCALDFAS) + v->subdfas = subdfas; + else + v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); + if (v->subdfas == NULL) { + if (v->pmatch != pmatch && v->pmatch != mat) + FREE(v->pmatch); + FreeVars(v); + return REG_ESPACE; } + for (i = 0; i < n; i++) + v->subdfas[i] = NULL; /* * Do it. @@ -274,7 +266,7 @@ exec( */ if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { - zapSubexpressions(pmatch, nmatch); + zapallsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; memcpy((void*)(pmatch), (void*)(v->pmatch), n*sizeof(regmatch_t)); } @@ -286,14 +278,35 @@ exec( if (v->pmatch != pmatch && v->pmatch != mat) { FREE(v->pmatch); } - if (v->mem != NULL && v->mem != mem) { - FREE(v->mem); + n = (size_t) v->g->ntree; + for (i = 0; i < n; i++) { + if (v->subdfas[i] != NULL) + freeDFA(v->subdfas[i]); } + if (v->subdfas != subdfas) + FREE(v->subdfas); FreeVars(v); return st; } /* + - getsubdfa - create or re-fetch the DFA for a subre node + * We only need to create the DFA once per overall regex execution. + * The DFA will be freed by the cleanup step in exec(). + */ +static struct dfa * +getsubdfa(struct vars * v, + struct subre * t) +{ + if (v->subdfas[t->id] == NULL) { + v->subdfas[t->id] = newDFA(v, &t->cnfa, &v->g->cmap, DOMALLOC); + if (ISERR()) + return NULL; + } + return v->subdfas[t->id]; +} + +/* - simpleFind - find a match for the main NFA (no-complications case) ^ static int simpleFind(struct vars *, struct cnfa *, struct colormap *); */ @@ -357,7 +370,10 @@ simpleFind( } else { end = longest(v, d, begin, v->stop, &hitend); } - NOERR(); + if (ISERR()) { + freeDFA(d); + return v->err; + } if (hitend && cold == NULL) { cold = begin; } @@ -388,11 +404,11 @@ simpleFind( } /* - * Submatches. + * Find submatches. */ - zapSubexpressions(v->pmatch, v->nmatch); - return dissect(v, v->g->tree, begin, end); + zapallsubs(v->pmatch, v->nmatch); + return cdissect(v, v->g->tree, begin, end); } /* @@ -488,9 +504,8 @@ complicatedFindLoop( } MDEBUG(("tentative end %ld\n", LOFF(end))); - zapSubexpressions(v->pmatch, v->nmatch); - zapSubtree(v, v->g->tree); - er = complicatedDissect(v, v->g->tree, begin, end); + zapallsubs(v->pmatch, v->nmatch); + er = cdissect(v, v->g->tree, begin, end); if (er == REG_OKAY) { if (v->nmatch > 0) { v->pmatch[0].rm_so = OFF(begin); @@ -501,6 +516,7 @@ complicatedFindLoop( } if (er != REG_NOMATCH) { ERR(er); + *coldp = cold; return er; } if ((shorter) ? end == estop : end == begin) { @@ -525,11 +541,11 @@ complicatedFindLoop( } /* - - zapSubexpressions - initialize the subexpression matches to "no match" - ^ static void zapSubexpressions(regmatch_t *, size_t); + - zapallsubs - initialize all subexpression matches to "no match" + ^ static void zapallsubs(regmatch_t *, size_t); */ static void -zapSubexpressions( +zapallsubs( regmatch_t *const p, const size_t n) { @@ -542,36 +558,33 @@ zapSubexpressions( } /* - - zapSubtree - initialize the retry memory of a subtree to zeros - ^ static void zapSubtree(struct vars *, struct subre *); + - zaptreesubs - initialize subexpressions within subtree to "no match" + ^ static void zaptreesubs(struct vars *, struct subre *); */ static void -zapSubtree( +zaptreesubs( struct vars *const v, struct subre *const t) { - if (t == NULL) { - return; - } - - assert(v->mem != NULL); - v->mem[t->retry] = 0; if (t->op == '(') { - assert(t->subno > 0); - v->pmatch[t->subno].rm_so = -1; - v->pmatch[t->subno].rm_eo = -1; + int n = t->subno; + assert(n > 0); + if ((size_t) n < v->nmatch) { + v->pmatch[n].rm_so = -1; + v->pmatch[n].rm_eo = -1; + } } if (t->left != NULL) { - zapSubtree(v, t->left); + zaptreesubs(v, t->left); } if (t->right != NULL) { - zapSubtree(v, t->right); + zaptreesubs(v, t->right); } } /* - - subset - set any subexpression relevant to a successful subre + - subset - set subexpression match data for a successful subre ^ static void subset(struct vars *, struct subre *, chr *, chr *); */ static void @@ -594,243 +607,87 @@ subset( } /* - - dissect - determine subexpression matches (uncomplicated case) - ^ static int dissect(struct vars *, struct subre *, chr *, chr *); + - cdissect - check backrefs and determine subexpression matches + * cdissect recursively processes a subre tree to check matching of backrefs + * and/or identify submatch boundaries for capture nodes. The proposed match + * runs from "begin" to "end" (not including "end"), and we are basically + * "dissecting" it to see where the submatches are. + * Before calling any level of cdissect, the caller must have run the node's + * DFA and found that the proposed substring satisfies the DFA. (We make + * the caller do that because in concatenation and iteration nodes, it's + * much faster to check all the substrings against the child DFAs before we + * recurse.) Also, caller must have cleared subexpression match data via + * zaptreesubs (or zapallsubs at the top level). + ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ -dissect( - struct vars *const v, - struct subre *t, - chr *const begin, /* beginning of relevant substring */ - chr *const end) /* end of same */ -{ -#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION - restart: -#endif - assert(t != NULL); - MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - case '|': /* alternation */ - assert(t->left != NULL); - return alternationDissect(v, t, begin, end); - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return concatenationDissect(v, t, begin, end); - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); -#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION - t = t->left; - goto restart; -#else - return dissect(v, t->left, begin, end); -#endif - default: - return REG_ASSERT; - } -} - -/* - - concatenationDissect - determine concatenation subexpression matches - - (uncomplicated) - ^ static int concatenationDissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -concatenationDissect( +cdissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { - struct dfa *d, *d2; - chr *mid; - int i; - int shorter = (t->left->flags&SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - - d = newDFA(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - NOERR(); - d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) { - assert(d2 == NULL); - freeDFA(d); - return v->err; - } - - /* - * Pick a tentative midpoint. - */ - - if (shorter) { - mid = shortest(v, d, begin, begin, end, NULL, NULL); - } else { - mid = longest(v, d, begin, end, NULL); - } - if (mid == NULL) { - freeDFA(d); - freeDFA(d2); - return REG_ASSERT; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - - /* - * Iterate until satisfaction or failure. - */ - - while (longest(v, d2, mid, end, NULL) != end) { - /* - * That midpoint didn't work, find a new one. - */ - - if (mid == stop) { - /* - * All possibilities exhausted! - */ - - MDEBUG(("no midpoint!\n")); - freeDFA(d); - freeDFA(d2); - return REG_ASSERT; - } - if (shorter) { - mid = shortest(v, d, begin, mid+1, end, NULL, NULL); - } else { - mid = longest(v, d, begin, mid-1, NULL); - } - if (mid == NULL) { - /* - * Failed to find a new one! - */ - - MDEBUG(("failed midpoint!\n")); - freeDFA(d); - freeDFA(d2); - return REG_ASSERT; - } - MDEBUG(("new midpoint %ld\n", LOFF(mid))); - } - - /* - * Satisfaction. - */ - - MDEBUG(("successful\n")); - freeDFA(d); - freeDFA(d2); - i = dissect(v, t->left, begin, mid); - if (i != REG_OKAY) { - return i; - } - return dissect(v, t->right, mid, end); -} - -/* - - alternationDissect - determine alternative subexpression matches (uncomplicated) - ^ static int alternationDissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -alternationDissect( - struct vars *const v, - struct subre *t, - chr *const begin, /* beginning of relevant substring */ - chr *const end) /* end of same */ -{ - int i; - - assert(t != NULL); - assert(t->op == '|'); - - for (i = 0; t != NULL; t = t->right, i++) { - struct dfa *d; + int er; - MDEBUG(("trying %dth\n", i)); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - d = newDFA(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - if (ISERR()) { - return v->err; - } - if (longest(v, d, begin, end, NULL) == end) { - MDEBUG(("success\n")); - freeDFA(d); - return dissect(v, t->left, begin, end); - } - freeDFA(d); - } - return REG_ASSERT; /* none of them matched?!? */ -} - -/* - - complicatedDissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, plus 1 - * so that 0 uniquely means "clean slate". - ^ static int complicatedDissect(struct vars *, struct subre *, chr *, chr *); - */ -static inline int /* regexec return code */ -complicatedDissect( - struct vars *const v, - struct subre *const t, - chr *const begin, /* beginning of relevant substring */ - chr *const end) /* end of same */ -{ assert(t != NULL); - MDEBUG(("complicatedDissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); + MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); switch (t->op) { case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - case '|': /* alternation */ - assert(t->left != NULL); - return complicatedAlternationDissect(v, t, begin, end); - case 'b': /* back ref -- shouldn't be calling us! */ + er = REG_OKAY; /* no action, parent did the work */ + break; + case 'b': /* back reference */ assert(t->left == NULL && t->right == NULL); - return complicatedBackrefDissect(v, t, begin, end); + er = cbrdissect(v, t, begin, end); + break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); - return complicatedConcatenationDissect(v, t, begin, end); + if (t->left->flags & SHORTER) /* reverse scan */ + er = crevcondissect(v, t, begin, end); + else + er = ccondissect(v, t, begin, end); + break; + case '|': /* alternation */ + assert(t->left != NULL); + er = caltdissect(v, t, begin, end); + break; + case '*': /* iteration */ + assert(t->left != NULL); + if (t->left->flags & SHORTER) /* reverse scan */ + er = creviterdissect(v, t, begin, end); + else + er = citerdissect(v, t, begin, end); + break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); - return complicatedCapturingDissect(v, t, begin, end); + er = cdissect(v, t->left, begin, end); + if (er == REG_OKAY) { + subset(v, t, begin, end); + } + break; default: - return REG_ASSERT; + er = REG_ASSERT; + break; } -} -static int /* regexec return code */ -complicatedCapturingDissect( - struct vars *const v, - struct subre *const t, - chr *const begin, /* beginning of relevant substring */ - chr *const end) /* end of same */ -{ - int er = complicatedDissect(v, t->left, begin, end); + /* + * We should never have a match failure unless backrefs lurk below; + * otherwise, either caller failed to check the DFA, or there's some + * inconsistency between the DFA and the node's innards. + */ + assert(er != REG_NOMATCH || (t->flags & BACKR)); - if (er == REG_OKAY) { - subset(v, t, begin, end); - } return er; } /* - - complicatedConcatenationDissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, plus 1 - * so that 0 uniquely means "clean slate". - ^ static int complicatedConcatenationDissect(struct vars *, struct subre *, chr *, chr *); + - ccondissect - dissect match for concatenation node + ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ -complicatedConcatenationDissect( +ccondissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ @@ -842,39 +699,23 @@ complicatedConcatenationDissect( assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); + assert(!(t->left->flags & SHORTER)); - if (t->left->flags&SHORTER) { /* reverse scan */ - return complicatedReversedDissect(v, t, begin, end); - } + d = getsubdfa(v, t->left); + NOERR(); + d2 = getsubdfa(v, t->right); + NOERR(); - d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { - return v->err; - } - d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { - freeDFA(d); - return v->err; - } - MDEBUG(("cConcat %d\n", t->retry)); + MDEBUG(("cConcat %d\n", t->id)); /* * Pick a tentative midpoint. */ - - if (v->mem[t->retry] == 0) { - mid = longest(v, d, begin, end, NULL); - if (mid == NULL) { - freeDFA(d); - freeDFA(d2); - return REG_NOMATCH; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[t->retry] - 1); - MDEBUG(("working midpoint %ld\n", LOFF(mid))); + mid = longest(v, d, begin, end, (int *) NULL); + if (mid == NULL) { + return REG_NOMATCH; } + MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. @@ -886,24 +727,20 @@ complicatedConcatenationDissect( */ if (longest(v, d2, mid, end, NULL) == end) { - int er = complicatedDissect(v, t->left, begin, mid); + int er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY) { - er = complicatedDissect(v, t->right, mid, end); + er = cdissect(v, t->right, mid, end); if (er == REG_OKAY) { /* * Satisfaction. */ MDEBUG(("successful\n")); - freeDFA(d); - freeDFA(d2); return REG_OKAY; } } - if ((er != REG_OKAY) && (er != REG_NOMATCH)) { - freeDFA(d); - freeDFA(d2); + if (er != REG_NOMATCH) { return er; } } @@ -917,9 +754,7 @@ complicatedConcatenationDissect( * All possibilities exhausted. */ - MDEBUG(("%d no midpoint\n", t->retry)); - freeDFA(d); - freeDFA(d2); + MDEBUG(("%d no midpoint\n", t->id)); return REG_NOMATCH; } mid = longest(v, d, begin, mid-1, NULL); @@ -928,27 +763,21 @@ complicatedConcatenationDissect( * Failed to find a new one. */ - MDEBUG(("%d failed midpoint\n", t->retry)); - freeDFA(d); - freeDFA(d2); + MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } - MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - zapSubtree(v, t->left); - zapSubtree(v, t->right); + MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); + zaptreesubs(v, t->left); + zaptreesubs(v, t->right); } } /* - - complicatedReversedDissect - determine backref shortest-first subexpression - - matches - * The retry memory stores the offset of the trial midpoint from begin, plus 1 - * so that 0 uniquely means "clean slate". - ^ static int complicatedReversedDissect(struct vars *, struct subre *, chr *, chr *); + - crevcondissect - dissect match for concatenation node, shortest-first + ^ static int crevcondissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ -complicatedReversedDissect( +crevcondissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ @@ -962,38 +791,22 @@ complicatedReversedDissect( assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags&SHORTER); - /* - * Concatenation -- need to split the substring between parts. - */ + d = getsubdfa(v, t->left); + NOERR(); + d2 = getsubdfa(v, t->right); + NOERR(); - d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { - return v->err; - } - d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { - freeDFA(d); - return v->err; - } - MDEBUG(("cRev %d\n", t->retry)); + MDEBUG(("crevcon %d\n", t->id)); /* * Pick a tentative midpoint. */ - if (v->mem[t->retry] == 0) { - mid = shortest(v, d, begin, begin, end, NULL, NULL); - if (mid == NULL) { - freeDFA(d); - freeDFA(d2); - return REG_NOMATCH; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[t->retry] - 1); - MDEBUG(("working midpoint %ld\n", LOFF(mid))); + mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); + if (mid == NULL) { + return REG_NOMATCH; } + MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* * Iterate until satisfaction or failure. @@ -1005,24 +818,20 @@ complicatedReversedDissect( */ if (longest(v, d2, mid, end, NULL) == end) { - int er = complicatedDissect(v, t->left, begin, mid); + int er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY) { - er = complicatedDissect(v, t->right, mid, end); + er = cdissect(v, t->right, mid, end); if (er == REG_OKAY) { /* * Satisfaction. */ MDEBUG(("successful\n")); - freeDFA(d); - freeDFA(d2); return REG_OKAY; } } - if (er != REG_OKAY && er != REG_NOMATCH) { - freeDFA(d); - freeDFA(d2); + if (er != REG_NOMATCH) { return er; } } @@ -1036,9 +845,7 @@ complicatedReversedDissect( * All possibilities exhausted. */ - MDEBUG(("%d no midpoint\n", t->retry)); - freeDFA(d); - freeDFA(d2); + MDEBUG(("%d no midpoint\n", t->id)); return REG_NOMATCH; } mid = shortest(v, d, begin, mid+1, end, NULL, NULL); @@ -1047,164 +854,474 @@ complicatedReversedDissect( * Failed to find a new one. */ - MDEBUG(("%d failed midpoint\n", t->retry)); - freeDFA(d); - freeDFA(d2); + MDEBUG(("%d failed midpoint\n", t->id)); return REG_NOMATCH; } - MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - zapSubtree(v, t->left); - zapSubtree(v, t->right); + MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); + zaptreesubs(v, t->left); + zaptreesubs(v, t->right); } } /* - - complicatedBackrefDissect - determine backref subexpression matches - ^ static int complicatedBackrefDissect(struct vars *, struct subre *, chr *, chr *); + - cbrdissect - dissect match for backref node + ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ -complicatedBackrefDissect( +cbrdissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { - int i, n = t->subno, min = t->min, max = t->max; - chr *paren, *p, *stop; - size_t len; + int n = t->subno, min = t->min, max = t->max; + size_t numreps; + size_t tlen; + size_t brlen; + chr *brstring; + chr *p; assert(t != NULL); assert(t->op == 'b'); assert(n >= 0); assert((size_t)n < v->nmatch); - MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); + MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max)); + /* get the backreferenced string */ if (v->pmatch[n].rm_so == -1) { return REG_NOMATCH; } - paren = v->start + v->pmatch[n].rm_so; - len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; - - /* - * No room to maneuver -- retries are pointless. - */ + brstring = v->start + v->pmatch[n].rm_so; + brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; - if (v->mem[t->retry]) { + /* special cases for zero-length strings */ + if (brlen == 0) { + /* + * matches only if target is zero length, but any number of + * repetitions can be considered to be present + */ + if (begin == end && min <= max) { + MDEBUG(("cbackref matched trivially\n")); + return REG_OKAY; + } return REG_NOMATCH; } - v->mem[t->retry] = 1; - - /* - * Special-case zero-length string. - */ - - if (len == 0) { - if (begin == end) { + if (begin == end) { + /* matches only if zero repetitions are okay */ + if (min == 0) { + MDEBUG(("cbackref matched trivially\n")); return REG_OKAY; } return REG_NOMATCH; } /* - * And too-short string. + * check target length to see if it could possibly be an allowed number of + * repetitions of brstring */ - assert(end >= begin); - if ((size_t)(end - begin) < len) { + assert(end > begin); + tlen = end - begin; + if (tlen % brlen != 0) + return REG_NOMATCH; + numreps = tlen / brlen; + if (numreps < min || (numreps > max && max != DUPINF)) return REG_NOMATCH; - } - stop = end - len; - - /* - * Count occurrences. - */ - i = 0; - for (p = begin; p <= stop && (i < max || max == DUPINF); p += len) { - if (v->g->compare(paren, p, len) != 0) { - break; - } - i++; + /* okay, compare the actual string contents */ + p = begin; + while (numreps-- > 0) { + if ((*v->g->compare) (brstring, p, brlen) != 0) + return REG_NOMATCH; + p += brlen; } - MDEBUG(("cbackref found %d\n", i)); - - /* - * And sort it out. - */ - if (p != end) { /* didn't consume all of it */ - return REG_NOMATCH; - } - if (min <= i && (i <= max || max == DUPINF)) { - return REG_OKAY; - } - return REG_NOMATCH; /* out of range */ + MDEBUG(("cbackref matched\n")); + return REG_OKAY; } /* - - complicatedAlternationDissect - determine alternative subexpression matches (w. - - complications) - ^ static int complicatedAlternationDissect(struct vars *, struct subre *, chr *, chr *); + - caltdissect - dissect match for alternation node + ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ -complicatedAlternationDissect( +caltdissect( struct vars *const v, struct subre *t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { + struct dfa *d; int er; -#define UNTRIED 0 /* not yet tried at all */ -#define TRYING 1 /* top matched, trying submatches */ -#define TRIED 2 /* top didn't match or submatches exhausted */ -#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION - if (0) { - doRight: + /* We loop, rather than tail-recurse, to handle a chain of alternatives */ + while (t != NULL) { + assert(t->op == '|'); + assert(t->left != NULL && t->left->cnfa.nstates > 0); + + MDEBUG(("calt n%d\n", t->id)); + + d = getsubdfa(v, t->left); + NOERR(); + if (longest(v, d, begin, end, (int *) NULL) == end) { + MDEBUG(("calt matched\n")); + er = cdissect(v, t->left, begin, end); + if (er != REG_NOMATCH) { + return er; + } + } + t = t->right; } -#endif - if (t == NULL) { - return REG_NOMATCH; + + return REG_NOMATCH; +} + +/* + - citerdissect - dissect match for iteration node + ^ static int citerdissect(struct vars *, struct subre *, chr *, chr *); + */ +static int /* regexec return code */ +citerdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ +{ + struct dfa *d; + chr **endpts; + chr *limit; + int min_matches; + size_t max_matches; + int nverified; + int k; + int i; + int er; + + assert(t->op == '*'); + assert(t->left != NULL && t->left->cnfa.nstates > 0); + assert(!(t->left->flags & SHORTER)); + assert(begin <= end); + + /* + * If zero matches are allowed, and target string is empty, just declare + * victory. OTOH, if target string isn't empty, zero matches can't work + * so we pretend the min is 1. + */ + min_matches = t->min; + if (min_matches <= 0) { + if (begin == end) + return REG_OKAY; + min_matches = 1; } - assert(t->op == '|'); - if (v->mem[t->retry] == TRIED) { - goto doRight; + + /* + * We need workspace to track the endpoints of each sub-match. Normally + * we consider only nonzero-length sub-matches, so there can be at most + * end-begin of them. However, if min is larger than that, we will also + * consider zero-length sub-matches in order to find enough matches. + * + * For convenience, endpts[0] contains the "begin" pointer and we store + * sub-match endpoints in endpts[1..max_matches]. + */ + max_matches = end - begin; + if (max_matches > t->max && t->max != DUPINF) + max_matches = t->max; + if (max_matches < min_matches) + max_matches = min_matches; + endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); + if (endpts == NULL) + return REG_ESPACE; + endpts[0] = begin; + + d = getsubdfa(v, t->left); + if (ISERR()) { + FREE(endpts); + return v->err; } + MDEBUG(("citer %d\n", t->id)); + + /* + * Our strategy is to first find a set of sub-match endpoints that are + * valid according to the child node's DFA, and then recursively dissect + * each sub-match to confirm validity. If any validity check fails, + * backtrack the last sub-match and try again. And, when we next try for + * a validity check, we need not recheck any successfully verified + * sub-matches that we didn't move the endpoints of. nverified remembers + * how many sub-matches are currently known okay. + */ - MDEBUG(("cAlt n%d\n", t->retry)); - assert(t->left != NULL); + /* initialize to consider first sub-match */ + nverified = 0; + k = 1; + limit = end; + + /* iterate until satisfaction or failure */ + while (k > 0) { + /* try to find an endpoint for the k'th sub-match */ + endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL); + if (endpts[k] == NULL) { + /* no match possible, so see if we can shorten previous one */ + k--; + goto backtrack; + } + MDEBUG(("%d: working endpoint %d: %ld\n", + t->id, k, LOFF(endpts[k]))); + + /* k'th sub-match can no longer be considered verified */ + if (nverified >= k) + nverified = k - 1; + + if (endpts[k] != end) { + /* haven't reached end yet, try another iteration if allowed */ + if (k >= max_matches) { + /* must try to shorten some previous match */ + k--; + goto backtrack; + } - if (v->mem[t->retry] == UNTRIED) { - struct dfa *d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); + /* reject zero-length match unless necessary to achieve min */ + if (endpts[k] == endpts[k - 1] && + (k >= min_matches || min_matches - k < end - endpts[k])) + goto backtrack; - if (ISERR()) { - return v->err; + k++; + limit = end; + continue; } - if (longest(v, d, begin, end, NULL) != end) { - freeDFA(d); - v->mem[t->retry] = TRIED; - goto doRight; + + /* + * We've identified a way to divide the string into k sub-matches + * that works so far as the child DFA can tell. If k is an allowed + * number of matches, start the slow part: recurse to verify each + * sub-match. We always have k <= max_matches, needn't check that. + */ + if (k < min_matches) + goto backtrack; + + MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); + + for (i = nverified + 1; i <= k; i++) { + zaptreesubs(v, t->left); + er = cdissect(v, t->left, endpts[i - 1], endpts[i]); + if (er == REG_OKAY) { + nverified = i; + continue; + } + if (er == REG_NOMATCH) + break; + /* oops, something failed */ + FREE(endpts); + return er; + } + + if (i > k) { + /* satisfaction */ + MDEBUG(("%d successful\n", t->id)); + FREE(endpts); + return REG_OKAY; } - freeDFA(d); - MDEBUG(("cAlt matched\n")); - v->mem[t->retry] = TRYING; + + /* match failed to verify, so backtrack */ + + backtrack: + /* + * Must consider shorter versions of the current sub-match. However, + * we'll only ask for a zero-length match if necessary. + */ + while (k > 0) { + chr *prev_end = endpts[k - 1]; + + if (endpts[k] > prev_end) { + limit = endpts[k] - 1; + if (limit > prev_end || + (k < min_matches && min_matches - k >= end - prev_end)) { + /* break out of backtrack loop, continue the outer one */ + break; + } + } + /* can't shorten k'th sub-match any more, consider previous one */ + k--; + } + } + + /* all possibilities exhausted */ + MDEBUG(("%d failed\n", t->id)); + FREE(endpts); + return REG_NOMATCH; +} + +/* + - creviterdissect - dissect match for iteration node, shortest-first + ^ static int creviterdissect(struct vars *, struct subre *, chr *, chr *); + */ +static int /* regexec return code */ +creviterdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ +{ + struct dfa *d; + chr **endpts; + chr *limit; + int min_matches; + size_t max_matches; + int nverified; + int k; + int i; + int er; + + assert(t->op == '*'); + assert(t->left != NULL && t->left->cnfa.nstates > 0); + assert(t->left->flags & SHORTER); + assert(begin <= end); + + /* + * If zero matches are allowed, and target string is empty, just declare + * victory. OTOH, if target string isn't empty, zero matches can't work + * so we pretend the min is 1. + */ + min_matches = t->min; + if (min_matches <= 0) { + if (begin == end) + return REG_OKAY; + min_matches = 1; } - er = complicatedDissect(v, t->left, begin, end); - if (er != REG_NOMATCH) { - return er; + /* + * We need workspace to track the endpoints of each sub-match. Normally + * we consider only nonzero-length sub-matches, so there can be at most + * end-begin of them. However, if min is larger than that, we will also + * consider zero-length sub-matches in order to find enough matches. + * + * For convenience, endpts[0] contains the "begin" pointer and we store + * sub-match endpoints in endpts[1..max_matches]. + */ + max_matches = end - begin; + if (max_matches > t->max && t->max != DUPINF) + max_matches = t->max; + if (max_matches < min_matches) + max_matches = min_matches; + endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); + if (endpts == NULL) + return REG_ESPACE; + endpts[0] = begin; + + d = getsubdfa(v, t->left); + if (ISERR()) { + FREE(endpts); + return v->err; } + MDEBUG(("creviter %d\n", t->id)); + + /* + * Our strategy is to first find a set of sub-match endpoints that are + * valid according to the child node's DFA, and then recursively dissect + * each sub-match to confirm validity. If any validity check fails, + * backtrack the last sub-match and try again. And, when we next try for + * a validity check, we need not recheck any successfully verified + * sub-matches that we didn't move the endpoints of. nverified remembers + * how many sub-matches are currently known okay. + */ + + /* initialize to consider first sub-match */ + nverified = 0; + k = 1; + limit = begin; + + /* iterate until satisfaction or failure */ + while (k > 0) { + /* disallow zero-length match unless necessary to achieve min */ + if (limit == endpts[k - 1] && + limit != end && + (k >= min_matches || min_matches - k < end - limit)) + limit++; + + /* if this is the last allowed sub-match, it must reach to the end */ + if (k >= max_matches) + limit = end; + + /* try to find an endpoint for the k'th sub-match */ + endpts[k] = shortest(v, d, endpts[k - 1], limit, end, + (chr **) NULL, (int *) NULL); + if (endpts[k] == NULL) { + /* no match possible, so see if we can lengthen previous one */ + k--; + goto backtrack; + } + MDEBUG(("%d: working endpoint %d: %ld\n", + t->id, k, LOFF(endpts[k]))); + + /* k'th sub-match can no longer be considered verified */ + if (nverified >= k) + nverified = k - 1; + + if (endpts[k] != end) { + /* haven't reached end yet, try another iteration if allowed */ + if (k >= max_matches) { + /* must try to lengthen some previous match */ + k--; + goto backtrack; + } + + k++; + limit = endpts[k - 1]; + continue; + } - v->mem[t->retry] = TRIED; -#ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION - goto doRight; -#else - doRight: - return complicatedAlternationDissect(v, t->right, begin, end); -#endif + /* + * We've identified a way to divide the string into k sub-matches + * that works so far as the child DFA can tell. If k is an allowed + * number of matches, start the slow part: recurse to verify each + * sub-match. We always have k <= max_matches, needn't check that. + */ + if (k < min_matches) + goto backtrack; + + MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); + + for (i = nverified + 1; i <= k; i++) { + zaptreesubs(v, t->left); + er = cdissect(v, t->left, endpts[i - 1], endpts[i]); + if (er == REG_OKAY) { + nverified = i; + continue; + } + if (er == REG_NOMATCH) + break; + /* oops, something failed */ + FREE(endpts); + return er; + } + + if (i > k) { + /* satisfaction */ + MDEBUG(("%d successful\n", t->id)); + FREE(endpts); + return REG_OKAY; + } + + /* match failed to verify, so backtrack */ + + backtrack: + /* + * Must consider longer versions of the current sub-match. + */ + while (k > 0) { + if (endpts[k] < end) { + limit = endpts[k] + 1; + /* break out of backtrack loop, continue the outer one */ + break; + } + /* can't lengthen k'th sub-match any more, consider previous one */ + k--; + } + } + + /* all possibilities exhausted */ + MDEBUG(("%d failed\n", t->id)); + FREE(endpts); + return REG_NOMATCH; } #include "rege_dfa.c" diff --git a/generic/regguts.h b/generic/regguts.h index c51e3f1..bb7a832 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -160,7 +160,14 @@ struct colordesc { union tree *block; /* block of solid color, if any */ }; -/* the color map itself */ +/* + * The color map itself + * + * Much of the data in the colormap struct is only used at compile time. + * However, the bulk of the space usage is in the "tree" structure, so it's + * not clear that there's much point in converting the rest to a more compact + * form when compilation is finished. + */ struct colormap { int magic; #define CMMAGIC 0x876 @@ -216,15 +223,14 @@ struct cvec { struct state; struct arc { - int type; -#define ARCFREE '\0' + int type; /* 0 if free, else an NFA arc type code */ color co; struct state *from; /* where it's from (and contained within) */ struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ + struct arc *outchain; /* link in *from's outs chain or free chain */ #define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ + struct arc *inchain; /* link in *to's ins chain */ + struct arc *colorchain; /* link in color's arc chain */ struct arc *colorchainRev; /* back-link in color's arc chain */ }; @@ -271,11 +277,22 @@ struct nfa { /* * definitions for compacted NFA + * + * The main space savings in a compacted NFA is from making the arcs as small + * as possible. We store only the transition color and next-state number for + * each arc. The list of out arcs for each state is an array beginning at + * cnfa.states[statenumber], and terminated by a dummy carc struct with + * co == COLORLESS. + * + * The non-dummy carc structs are of two types: plain arcs and LACON arcs. + * Plain arcs just store the transition color number as "co". LACON arcs + * store the lookahead constraint number plus cnfa.ncolors as "co". LACON + * arcs can be distinguished from plain by testing for co >= cnfa.ncolors. */ struct carc { color co; /* COLORLESS is list terminator */ - int to; /* state number */ + int to; /* next-state number */ }; struct cnfa { @@ -287,7 +304,10 @@ struct cnfa { int post; /* teardown state number */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ + char *stflags; /* vector of per-state flags bytes */ +#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */ struct carc **states; /* vector of pointers to outarc lists */ + /* states[n] are pointers into a single malloc'd array of arcs */ struct carc *arcs; /* the area for the lists */ }; #define ZAPCNFA(cnfa) ((cnfa).nstates = 0) @@ -303,11 +323,28 @@ struct cnfa { /* * subexpression tree + * + * "op" is one of: + * '=' plain regex without interesting substructure (implemented as DFA) + * 'b' back-reference (has no substructure either) + * '(' capture node: captures the match of its single child + * '.' concatenation: matches a match for left, then a match for right + * '|' alternation: matches a match for left or a match for right + * '*' iteration: matches some number of matches of its single child + * + * Note: the right child of an alternation must be another alternation or + * NULL; hence, an N-way branch requires N alternation nodes, not N-1 as you + * might expect. This could stand to be changed. Actually I'd rather see + * a single alternation node with N children, but that will take revising + * the representation of struct subre. + * + * Note: when a backref is directly quantified, we stick the min/max counts + * into the backref rather than plastering an iteration node on top. This is + * for efficiency: there is no need to search for possible division points. */ struct subre { - char op; /* '|', '.' (concat), 'b' (backref), '(', - * '=' */ + char op; /* see type codes above */ char flags; #define LONGER 01 /* prefers longer match */ #define SHORTER 02 /* prefers shorter match */ @@ -323,10 +360,10 @@ struct subre { #define PREF(f) ((f)&NOPROP) #define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) #define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) - short retry; /* index into retry memory */ + short id; /* ID of subre (1..ntree-1) */ int subno; /* subexpression number (for 'b' and '(') */ - short min; /* min repetitions, for backref only */ - short max; /* max repetitions, for backref only */ + short min; /* min repetitions for iteration or backref */ + short max; /* max repetitions for iteration or backref */ struct subre *left; /* left child, if any (also freelist chain) */ struct subre *right; /* right child, if any */ struct state *begin; /* outarcs from here... */ @@ -356,7 +393,7 @@ struct guts { size_t nsub; /* copy of re_nsub */ struct subre *tree; struct cnfa search; /* for fast preliminary search */ - int ntree; + int ntree; /* number of subre's, plus one */ struct colormap cmap; int (*compare) (const chr *, const chr *, size_t); struct subre *lacons; /* lookahead-constraint vector */ diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c index 0a325b3..15502e7 100644 --- a/generic/tclDisassemble.c +++ b/generic/tclDisassemble.c @@ -794,6 +794,7 @@ PrintSourceToObj( { register const char *p; register int i = 0, len; + Tcl_UniChar ch = 0; if (stringPtr == NULL) { Tcl_AppendToObj(appendObj, "\"\"", -1); @@ -803,7 +804,6 @@ PrintSourceToObj( Tcl_AppendToObj(appendObj, "\"", -1); p = stringPtr; for (; (*p != '\0') && (i < maxChars); p+=len) { - Tcl_UniChar ch; len = TclUtfToUniChar(p, &ch); switch (ch) { @@ -832,6 +832,12 @@ PrintSourceToObj( i += 2; continue; default: +#if TCL_UTF_MAX > 4 + if ((int) ch > 0xffff) { + Tcl_AppendPrintfToObj(appendObj, "\\U%08x", (int) ch); + i += 10; + } else +#endif if (ch < 0x20 || ch >= 0x7f) { Tcl_AppendPrintfToObj(appendObj, "\\u%04x", ch); i += 6; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 501fc57..b832d02 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2463,22 +2463,35 @@ UtfToUnicodeProc( if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; - } + } src += TclUtfToUniChar(src, &ch); /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] - * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. */ #ifdef WORDS_BIGENDIAN +#if TCL_UTF_MAX > 4 + *dst++ = (ch >> 24); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = (ch & 0xFF); +#else *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); +#endif +#else +#if TCL_UTF_MAX > 4 + *dst++ = (ch & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = ((ch >> 16) & 0xFF); + *dst++ = (ch >> 24); #else *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); #endif +#endif } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; diff --git a/generic/tclExecute.c b/generic/tclExecute.c index d285816..5378113 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -4054,7 +4054,6 @@ TEBCresume( } else if (flags & TCL_LEAVE_ERR_MSG) { goto slowUnsetArray; } - varPtr->value.objPtr = NULL; TRACE_APPEND(("OK\n")); NEXT_INST_F(6, 1, 0); } else if (!varPtr && !(flags & TCL_LEAVE_ERR_MSG)) { diff --git a/generic/tclMain.c b/generic/tclMain.c index 82a5df3..70f705b 100644 --- a/generic/tclMain.c +++ b/generic/tclMain.c @@ -80,7 +80,10 @@ NewNativeObj( * source directory to make their own modified versions). */ +#if defined _MSC_VER && _MSC_VER < 1900 +/* isatty is always defined on MSVC 14.0, but not necessarily as CRTIMPORT. */ extern CRTIMPORT int isatty(int fd); +#endif /* * The thread-local variables for this file's functions. diff --git a/generic/tclThread.c b/generic/tclThread.c index c2112b7..7882a48 100644 --- a/generic/tclThread.c +++ b/generic/tclThread.c @@ -304,7 +304,9 @@ void TclMutexUnlockAndFinalize( Tcl_Mutex *mutexPtr) { +#ifdef TCL_THREADS Tcl_Mutex mutex; +#endif TclpMasterLock(); TclpMutexLock(); #ifdef TCL_THREADS diff --git a/generic/tclUniData.c b/generic/tclUniData.c index d2f66fe..1ca119d 100644 --- a/generic/tclUniData.c +++ b/generic/tclUniData.c @@ -1556,4 +1556,8 @@ enum { * Unicode character tables. */ -#define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0xffff) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) +#if TCL_UTF_MAX > 3 +# define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0x1fffff) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) +#else +# define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0xffff) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) +#endif diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 48a56cd..88d911a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -117,19 +117,10 @@ UtfCount( if (ch <= 0x7FF) { return 2; } - if (ch <= 0xFFFF) { - return 3; - } #if TCL_UTF_MAX > 3 - if (ch <= 0x1FFFFF) { + if ((ch > 0xFFFF) && (ch <= 0x10FFFF)) { return 4; } - if (ch <= 0x3FFFFFF) { - return 5; - } - if (ch <= 0x7FFFFFFF) { - return 6; - } #endif return 3; } @@ -172,6 +163,23 @@ Tcl_UniCharToUtf( return 2; } if (ch <= 0xFFFF) { +#if TCL_UTF_MAX == 4 + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x0400) { + /* Low surrogate */ + buf[3] = (char) ((ch | 0x80) & 0xBF); + buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); + return 4; + } else { + /* High surrogate */ + ch += 0x40; + buf[2] = (char) (((ch << 4) | 0x80) & 0xB0); + buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF); + buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7); + return 0; + } + } +#endif three: buf[2] = (char) ((ch | 0x80) & 0xBF); buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); @@ -180,30 +188,13 @@ Tcl_UniCharToUtf( } #if TCL_UTF_MAX > 3 - if (ch <= 0x1FFFFF) { + if (ch <= 0x10FFFF) { buf[3] = (char) ((ch | 0x80) & 0xBF); buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); buf[0] = (char) ((ch >> 18) | 0xF0); return 4; } - if (ch <= 0x3FFFFFF) { - buf[4] = (char) ((ch | 0x80) & 0xBF); - buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 24) | 0xF8); - return 5; - } - if (ch <= 0x7FFFFFFF) { - buf[5] = (char) ((ch | 0x80) & 0xBF); - buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 30) | 0xFC); - return 6; - } #endif } @@ -1365,6 +1356,11 @@ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1); } @@ -1388,6 +1384,11 @@ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return ((ALPHA_BITS >> GetCategory(ch)) & 1); } @@ -1411,6 +1412,18 @@ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + ch &= 0x1fffff; + if ((ch == 0xe0001) || ((ch >= 0xe0020) && (ch <= 0xe007f))) { + return 1; + } + if ((ch >= 0xf0000) && ((ch & 0xffff) <= 0xfffd)) { + return 1; + } + return 0; + } +#endif return ((CONTROL_BITS >> GetCategory(ch)) & 1); } @@ -1434,6 +1447,11 @@ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER); } @@ -1457,6 +1475,12 @@ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + ch &= 0x1fffff; + return (ch >= 0xe0100) && (ch <= 0xe01ef); + } +#endif return ((GRAPH_BITS >> GetCategory(ch)) & 1); } @@ -1480,6 +1504,11 @@ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (GetCategory(ch) == LOWERCASE_LETTER); } @@ -1503,6 +1532,12 @@ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + ch &= 0x1fffff; + return (ch >= 0xe0100) && (ch <= 0xe01ef); + } +#endif return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1); } @@ -1526,6 +1561,11 @@ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return ((PUNCT_BITS >> GetCategory(ch)) & 1); } @@ -1549,16 +1589,27 @@ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + /* Ignore upper 11 bits. */ + ch &= 0x1fffff; +#else + /* Ignore upper 16 bits. */ + ch &= 0xffff; +#endif + /* * If the character is within the first 127 characters, just use the * standard C function, otherwise consult the Unicode table. */ - if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) { + if (ch < 0x80) { return TclIsSpaceProc((char) ch); - } else if ((Tcl_UniChar) ch == 0x0085 || (Tcl_UniChar) ch == 0x180e - || (Tcl_UniChar) ch == 0x200b || (Tcl_UniChar) ch == 0x2060 - || (Tcl_UniChar) ch == 0x202f || (Tcl_UniChar) ch == 0xfeff) { +#if TCL_UTF_MAX > 3 + } else if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; +#endif + } else if (ch == 0x0085 || ch == 0x180e || ch == 0x200b + || ch == 0x202f || ch == 0x2060 || ch == 0xfeff) { return 1; } else { return ((SPACE_BITS >> GetCategory(ch)) & 1); @@ -1585,6 +1636,11 @@ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (GetCategory(ch) == UPPERCASE_LETTER); } @@ -1608,6 +1664,11 @@ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return ((WORD_BITS >> GetCategory(ch)) & 1); } diff --git a/library/init.tcl b/library/init.tcl index 6039594..8daab9d 100644 --- a/library/init.tcl +++ b/library/init.tcl @@ -12,7 +12,7 @@ # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# This test intentionally written in pre-7.5 Tcl +# This test intentionally written in pre-7.5 Tcl if {[info commands package] == ""} { error "version mismatch: library\nscripts expect Tcl version 7.5b1 or later but the loaded version is\nonly [info patchlevel]" } @@ -332,7 +332,7 @@ proc unknown args { } } - if {([info level] == 1) && ([info script] eq "") + if {([info level] == 1) && ([info script] eq "") && [info exists tcl_interactive] && $tcl_interactive} { if {![info exists auto_noexec]} { set new [auto_execok $name] diff --git a/library/msgcat/msgcat.tcl b/library/msgcat/msgcat.tcl index 2a66e9a..928474d 100644 --- a/library/msgcat/msgcat.tcl +++ b/library/msgcat/msgcat.tcl @@ -262,7 +262,7 @@ proc msgcat::mcexists {args} { } } set src [lindex $args 0] - + while {$ns ne ""} { foreach loc $loclist { if {[dict exists $Msgs $ns $loc $src]} { @@ -305,7 +305,7 @@ proc msgcat::mclocale {args} { } if {[lindex $Loclist 0] ne $newLocale} { set Loclist [GetPreferences $newLocale] - + # locale not loaded jet LoadAll $Loclist # Invoke callback @@ -463,7 +463,7 @@ proc msgcat::mcpackagelocale {subcommand {locale ""}} { set locale [string tolower $locale] } set ns [uplevel 1 {::namespace current}] - + switch -exact -- $subcommand { get { return [lindex [PackagePreferences $ns] 0] } preferences { return [PackagePreferences $ns] } @@ -646,7 +646,7 @@ proc msgcat::mcpackageconfig {subcommand option {value ""}} { isset { return [dict exists $PackageConfig $option $ns] } unset { dict unset PackageConfig $option $ns } set { # Set option - + if {$option eq "mcfolder"} { set value [file normalize $value] } @@ -779,7 +779,7 @@ proc msgcat::LoadAll {locales} { set locales [ListComplement $LoadedLocales $locales] if {0 == [llength $locales]} { return {} } lappend LoadedLocales {*}$locales - + set packages [lsort -unique [concat\ [dict keys [dict get $PackageConfig loadcmd]]\ [dict keys [dict get $PackageConfig mcfolder]]]] @@ -812,14 +812,14 @@ proc msgcat::Load {ns locales {callbackonly 0}} { # Invoke callback Invoke loadcmd $locales $ns - + if {$callbackonly || ![dict exists $PackageConfig mcfolder $ns]} { return 0 } # Invoke file load set langdir [dict get $PackageConfig mcfolder $ns] - + # Save the file locale if we are recursively called if {[info exists FileLocale]} { set nestedFileLocale $FileLocale diff --git a/library/opt/optparse.tcl b/library/opt/optparse.tcl index 163bc7d..f628487 100644 --- a/library/opt/optparse.tcl +++ b/library/opt/optparse.tcl @@ -435,7 +435,7 @@ proc ::tcl::OptProcArgGiven {argname} { } elseif {$state == "optValue"} { set state next; # not used, for debug only # go to next state - return + return } else { return -code error [OptMissingValue $descriptions] } @@ -538,7 +538,7 @@ proc ::tcl::OptKeyParse {descKey arglist} { # Analyse the result # Walk through the tree: - OptTreeVars $desc "#[expr {[info level]-1}]" + OptTreeVars $desc "#[expr {[info level]-1}]" } # determine string length for nice tabulated output diff --git a/library/package.tcl b/library/package.tcl index a94bfaa..595fc5c 100644 --- a/library/package.tcl +++ b/library/package.tcl @@ -729,7 +729,7 @@ proc ::tcl::Pkg::Create {args} { foreach key {load source} { foreach filespec $opts(-$key) { lassign $filespec filename proclist - + if { [llength $proclist] == 0 } { set cmd "\[list $key \[file join \$dir [list $filename]\]\]" lappend cmdList $cmd diff --git a/library/platform/platform.tcl b/library/platform/platform.tcl index 1bce7b5..e0bcce6 100644 --- a/library/platform/platform.tcl +++ b/library/platform/platform.tcl @@ -323,7 +323,7 @@ proc ::platform::patterns {id} { lappend res macosx-universal macosx-i386-x86_64 } macosx*-* { - # 10.5+ + # 10.5+ if {[regexp {macosx([^-]*)-(.*)} $id -> v cpu]} { switch -exact -- $cpu { diff --git a/library/safe.tcl b/library/safe.tcl index 394aa97..ea6391d 100644 --- a/library/safe.tcl +++ b/library/safe.tcl @@ -853,7 +853,7 @@ proc ::safe::AliasSource {slave args} { return -code error $msg } set file [lindex $args $at] - + # get the real path from the virtual one. if {[catch { set realfile [TranslatePath $slave $file] @@ -861,7 +861,7 @@ proc ::safe::AliasSource {slave args} { Log $slave $msg return -code error "permission denied" } - + # check that the path is in the access path of that slave if {[catch { FileInAccessPath $slave $realfile diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index f2b3317..a2821d7 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -347,7 +347,7 @@ namespace eval tcltest { # This is very subtle and tricky, so let me try to explain. # (Hopefully this longer comment will be clear when I come # back in a few months, unlike its predecessor :) ) - # + # # The [outputChannel] command (and underlying variable) have to # be kept in sync with the [configure -outfile] configuration # option ( and underlying variable Option(-outfile) ). This is @@ -362,12 +362,12 @@ namespace eval tcltest { # configuration options to parse the command line option the first # time they are read. These traces are cancelled whenever the # program itself calls [configure]. - # + # # OK, then so to support tcltest 1 compatibility, it seems we want # to get the return from [outputFile] to trigger the read traces, # just in case. # - # BUT! A little known feature of Tcl variable traces is that + # BUT! A little known feature of Tcl variable traces is that # traces are disabled during the handling of other traces. So, # if we trigger read traces on Option(-outfile) and that triggers # command line parsing which turns around and sets an initial @@ -608,7 +608,7 @@ namespace eval tcltest { set code [catch {Configure {*}$args} msg] return -code $code $msg } - + proc AcceptVerbose { level } { set level [AcceptList $level] if {[llength $level] == 1} { @@ -639,7 +639,7 @@ namespace eval tcltest { skipped tests if 's' is specified, the bodies of failed tests if 'b' is specified, and when tests start if 't' is specified. ErrorInfo is displayed if 'e' is specified. Source file line - information of failed tests is displayed if 'l' is specified. + information of failed tests is displayed if 'l' is specified. } AcceptVerbose verbose # Match and skip patterns default to the empty list, except for @@ -687,7 +687,7 @@ namespace eval tcltest { # some additional output regarding operations of the test harness. # The tcltest package currently implements only up to debug level 3. Option -debug 0 { - Internal debug level + Internal debug level } AcceptInteger debug proc SetSelectedConstraints args { @@ -715,7 +715,7 @@ namespace eval tcltest { } Option -limitconstraints 0 { whether to run only tests with the constraints - } AcceptBoolean limitConstraints + } AcceptBoolean limitConstraints trace add variable Option(-limitconstraints) write \ [namespace code {ClearUnselectedConstraints ;#}] @@ -728,7 +728,7 @@ namespace eval tcltest { # Default is to run each test file in a separate process Option -singleproc 0 { whether to run all tests in one process - } AcceptBoolean singleProcess + } AcceptBoolean singleProcess proc AcceptTemporaryDirectory { directory } { set directory [AcceptAbsolutePath $directory] @@ -1257,7 +1257,7 @@ proc tcltest::DefineConstraintInitializers {} { # setting files into nonblocking mode. ConstraintInitializer nonBlockFiles { - set code [expr {[catch {set f [open defs r]}] + set code [expr {[catch {set f [open defs r]}] || [catch {chan configure $f -blocking off}]}] catch {close $f} set code @@ -1271,7 +1271,7 @@ proc tcltest::DefineConstraintInitializers {} { # (Mark Diekhans). ConstraintInitializer asyncPipeClose {expr { - !([string equal unix $::tcl_platform(platform)] + !([string equal unix $::tcl_platform(platform)] && ([catch {exec uname -X | fgrep {Release = 3.2v}}] == 0))}} # Test to see if we have a broken version of sprintf with respect @@ -1954,7 +1954,7 @@ proc tcltest::test {name description args} { return } - # Save information about the core file. + # Save information about the core file. if {[preserveCore]} { if {[file exists [file join [workingDirectory] core]]} { set coreModTime [file mtime [file join [workingDirectory] core]] @@ -2060,7 +2060,7 @@ proc tcltest::test {name description args} { } else { set coreFailure 1 } - + if {([preserveCore] > 1) && ($coreFailure)} { append coreMsg "\nMoving file to:\ [file join [temporaryDirectory] core-$name]" @@ -2100,7 +2100,7 @@ proc tcltest::test {name description args} { variable currentFailure true if {![IsVerbose body]} { set body "" - } + } puts [outputChannel] "\n" if {[IsVerbose line]} { if {![catch {set testFrame [info frame -1]}] && @@ -2121,7 +2121,7 @@ proc tcltest::test {name description args} { puts [outputChannel] "$testFile:$testLine: error: test failed:\ $name [string trim $description]" } - } + } puts [outputChannel] "==== $name\ [string trim $description] FAILED" if {[string length $body]} { @@ -2277,7 +2277,7 @@ proc tcltest::Skipped {name constraints} { } } } - + if {!$doTest} { if {[IsVerbose skip]} { puts [outputChannel] "++++ $name SKIPPED: $constraints" @@ -2834,9 +2834,9 @@ proc tcltest::runAllTests { {shell ""} } { set dir [file tail $directory] puts [outputChannel] [string repeat ~ 44] puts [outputChannel] "$dir test began at [eval $timeCmd]\n" - + uplevel 1 [list ::source [file join $directory all.tcl]] - + set endTime [eval $timeCmd] puts [outputChannel] "\n$dir test ended at $endTime" puts [outputChannel] "" @@ -3019,7 +3019,7 @@ proc tcltest::removeFile {name {directory ""}} { DebugDo 1 { Warn "removeFile removing \"$fullName\":\n not created by makeFile" } - } + } if {![file isfile $fullName]} { DebugDo 1 { Warn "removeFile removing \"$fullName\":\n not a file" @@ -3090,7 +3090,7 @@ proc tcltest::removeDirectory {name {directory ""}} { Warn "removeDirectory removing \"$fullName\":\n not created\ by makeDirectory" } - } + } if {![file isdirectory $fullName]} { DebugDo 1 { Warn "removeDirectory removing \"$fullName\":\n not a directory" @@ -3285,7 +3285,7 @@ proc tcltest::threadReap {} { testthread errorproc ThreadError return [llength [testthread names]] } elseif {[info commands thread::id] ne {}} { - + # Thread extension thread::errorproc ThreadNullError diff --git a/tests/httpd11.tcl b/tests/httpd11.tcl index b1d7947..7880494 100644 --- a/tests/httpd11.tcl +++ b/tests/httpd11.tcl @@ -44,7 +44,7 @@ proc get-chunks {data {compression gzip}} { deflate { set data [zlib deflate $data] } compress { set data [zlib compress $data] } } - + set data "" set chunker [make-chunk-generator $data 512] while {[string length [set chunk [$chunker]]]} { @@ -59,7 +59,7 @@ proc blow-chunks {data {ochan stdout} {compression gzip}} { deflate { set data [zlib deflate $data] } compress { set data [zlib compress $data] } } - + set chunker [make-chunk-generator $data 512] while {[string length [set chunk [$chunker]]]} { puts -nonewline $ochan $chunk @@ -156,7 +156,7 @@ proc Service {chan addr port} { set code "200 OK" set close [expr {[dict get? $meta connection] eq "close"}] } - + if {$protocol eq "HTTP/1.1"} { foreach enc [split [dict get? $meta accept-encoding] ,] { set enc [string trim $enc] @@ -169,7 +169,7 @@ proc Service {chan addr port} { } else { set close 1 } - + foreach pair [split $query &] { if {[scan $pair {%[^=]=%s} key val] != 2} {set val ""} switch -exact -- $key { @@ -209,7 +209,7 @@ proc Service {chan addr port} { } else { puts -nonewline $chan $data } - + if {$close} { chan event $chan readable {} close $chan diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 4fbc380..cd89a02 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -349,7 +349,7 @@ test iocmd-8.19 {fconfigure command / win tty channel} -constraints {nonPortable close $tty } } -returnCodes error -result {bad option "-blah": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -translation, -mode, -handshake, -pollinterval, -sysbuffer, -timeout, -ttycontrol, or -xchar} -# TODO: Test parsing of serial channel options (nonportable, since requires an +# TODO: Test parsing of serial channel options (nonPortable, since requires an # open channel to work with). test iocmd-9.1 {eof command} { @@ -3770,7 +3770,7 @@ test iocmd.tf-32.0 {origin thread of moved channel gone} -match glob -body { # The test iocmd.tf-32.1 unavoidably exhibits a memory leak. We are testing # the ability of the reflected channel system to react to the situation where # the thread in which the driver routines runs exits during driver operations. -# In this case, thread exit handlers signal back to the owner thread so that the +# In this case, thread exit handlers signal back to the owner thread so that the # channel operation does not hang. There's no way to test this without actually # exiting a thread in mid-operation, and that action is unavoidably leaky (which # is why [thread::exit] is advised against). diff --git a/tests/reg.test b/tests/reg.test index 5cc81fe..f028a53 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -663,7 +663,13 @@ expectError 14.19 - {a(b)c\2} ESUBREG expectMatch 14.20 bR {a\(b*\)c\1} abbcbb abbcbb bb expectMatch 14.21 RP {^([bc])\1*$} bbb bbb b expectMatch 14.22 RP {^([bc])\1*$} ccc ccc c -knownBug expectNomatch 14.23 R {^([bc])\1*$} bcb +expectNomatch 14.23 RP {^([bc])\1*$} bcb +expectMatch 14.24 LRP {^(\w+)( \1)+$} {abc abc abc} {abc abc abc} abc { abc} +expectNomatch 14.25 LRP {^(\w+)( \1)+$} {abc abd abc} +expectNomatch 14.26 LRP {^(\w+)( \1)+$} {abc abc abd} +expectMatch 14.27 RP {^(.+)( \1)+$} {abc abc abc} {abc abc abc} abc { abc} +expectNomatch 14.28 RP {^(.+)( \1)+$} {abc abd abc} +expectNomatch 14.29 RP {^(.+)( \1)+$} {abc abc abd} doing 15 "octal escapes vs back references" @@ -790,6 +796,7 @@ expectMatch 21.31 LP "\\y(\\w+)\\y" "-- abc-" "abc" "abc" expectMatch 21.32 - a((b|c)d+)+ abacdbd acdbd bd b expectMatch 21.33 N (.*).* abc abc abc expectMatch 21.34 N (a*)* bc "" "" +expectMatch 21.35 M { TO (([a-z0-9._]+|"([^"]+|"")+")+)} {asd TO foo} { TO foo} foo o {} doing 22 "multicharacter collating elements" @@ -842,6 +849,7 @@ expectMatch 24.9 - 3z* 123zzzz456 3zzzz expectMatch 24.10 PT 3z*? 123zzzz456 3 expectMatch 24.11 - z*4 123zzzz456 zzzz4 expectMatch 24.12 PT z*?4 123zzzz456 zzzz4 +expectMatch 24.13 PT {^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$} {foo/bar/baz} {foo/bar/baz} {foo} {bar} {baz} doing 25 "mixed quantifiers" diff --git a/tests/socket.test b/tests/socket.test index 4f90e51..8473602 100644 --- a/tests/socket.test +++ b/tests/socket.test @@ -1794,7 +1794,7 @@ test socket_$af-13.1 {Testing use of shared socket between two threads} -body { close $s thread::release $serverthread append result " " [llength [thread::names]] -} -result {hello 1} -constraints [list socket supported_$af thread] +} -result {hello 1} -constraints [list socket supported_$af thread] # ---------------------------------------------------------------------- @@ -2249,7 +2249,7 @@ test socket-14.11.0 {pending [socket -async] and nonblocking [puts], no listener unset x } -result {socket is not connected} -returnCodes 1 test socket-14.11.1 {pending [socket -async] and nonblocking [puts], no listener, flush} \ - -constraints {socket nonportable} \ + -constraints {socket nonPortable} \ -body { set sock [socket -async localhost [randport]] fconfigure $sock -blocking 0 @@ -2281,10 +2281,10 @@ test socket-14.13 {testing writable event when quick failure} \ -constraints {socket win supported_inet} \ -body { # Test for bug 336441ed59 where a quick background fail was ignored - + # Test only for windows as socket -async 255.255.255.255 fails # directly on unix - + # The following connect should fail very quickly set a1 [after 2000 {set x timeout}] set s [socket -async 255.255.255.255 43434] @@ -2299,7 +2299,7 @@ test socket-14.13 {testing writable event when quick failure} \ test socket-14.14 {testing fileevent readable on failed async socket connect} \ -constraints {socket} -body { # Test for bug 581937ab1e - + set a1 [after 5000 {set x timeout}] # This connect should fail set s [socket -async localhost [randport]] diff --git a/tests/unixFCmd.test b/tests/unixFCmd.test index 2d227fe..183c145 100644 --- a/tests/unixFCmd.test +++ b/tests/unixFCmd.test @@ -385,7 +385,7 @@ file delete -force -- foo.test test unixFCmd-18.1 {Unix pwd} -constraints {unix notRoot nonPortable} -setup { set cd [pwd] } -body { - # This test is nonportable because SunOS generates a weird error + # This test is nonPortable because SunOS generates a weird error # message when the current directory isn't readable. set nd $cd/tstdir file mkdir $nd diff --git a/tools/findBadExternals.tcl b/tools/findBadExternals.tcl index 7592f17..2228357 100755 --- a/tools/findBadExternals.tcl +++ b/tools/findBadExternals.tcl @@ -1,5 +1,5 @@ # findBadExternals.tcl -- -# +# # This script scans the Tcl load library for exported symbols # that do not begin with 'Tcl' or 'tcl'. It reports them on the # standard output. It is used to make sure that the library does @@ -29,7 +29,7 @@ proc main {argc argv} { macosx { set status [catch { exec nm --extern-only --defined-only [lindex $argv 0] - } result] + } result] } windows { set status [catch { diff --git a/tools/loadICU.tcl b/tools/loadICU.tcl index 5b09e2c..31f1e54 100755 --- a/tools/loadICU.tcl +++ b/tools/loadICU.tcl @@ -432,7 +432,7 @@ proc handleLocaleFile { localeName fileName msgFileName } { if { ![info exists format($localeName,TIME_FORMAT)] } { for { set i 3 } { $i >= 0 } { incr i -1 } { - if { [regexp H [lindex $items(DateTimePatterns) $i]] + if { [regexp H [lindex $items(DateTimePatterns) $i]] && [regexp s [lindex $items(DateTimePatterns) $i]] } { break } @@ -464,7 +464,7 @@ proc handleLocaleFile { localeName fileName msgFileName } { if { ![info exists format($localeName,TIME_FORMAT_12)] } { for { set i 3 } { $i >= 0 } { incr i -1 } { - if { [regexp h [lindex $items(DateTimePatterns) $i]] + if { [regexp h [lindex $items(DateTimePatterns) $i]] && [regexp s [lindex $items(DateTimePatterns) $i]] } { break } @@ -489,7 +489,7 @@ proc handleLocaleFile { localeName fileName msgFileName } { # Date and time... Prefer 24-hour format to 12-hour format. - if { ![info exists format($localeName,DATE_TIME_FORMAT)] + if { ![info exists format($localeName,DATE_TIME_FORMAT)] && [info exists format($localeName,DATE_FORMAT)] && [info exists format($localeName,TIME_FORMAT)]} { set format($localeName,DATE_TIME_FORMAT) \ @@ -497,7 +497,7 @@ proc handleLocaleFile { localeName fileName msgFileName } { append format($localeName,DATE_TIME_FORMAT) \ " " $format($localeName,TIME_FORMAT) " %z" } - if { ![info exists format($localeName,DATE_TIME_FORMAT)] + if { ![info exists format($localeName,DATE_TIME_FORMAT)] && [info exists format($localeName,DATE_FORMAT)] && [info exists format($localeName,TIME_FORMAT_12)]} { set format($localeName,DATE_TIME_FORMAT) \ @@ -517,7 +517,7 @@ proc handleLocaleFile { localeName fileName msgFileName } { # Write the string sets to the file. - foreach key { + foreach key { LOCALE_NUMERALS LOCALE_DATE_FORMAT LOCALE_TIME_FORMAT LOCALE_DATE_TIME_FORMAT LOCALE_ERAS LOCALE_YEAR_FORMAT } { @@ -588,7 +588,7 @@ proc backslashify { string } { set retval {} foreach char [split $string {}] { scan $char %c ccode - if { $ccode >= 0x0020 && $ccode < 0x007f && $char ne "\"" + if { $ccode >= 0x0020 && $ccode < 0x007f && $char ne "\"" && $char ne "\{" && $char ne "\}" && $char ne "\[" && $char ne "\]" && $char ne "\\" && $char ne "\$" } { append retval $char diff --git a/tools/makeTestCases.tcl b/tools/makeTestCases.tcl index d96a221..6cc033b 100755 --- a/tools/makeTestCases.tcl +++ b/tools/makeTestCases.tcl @@ -40,7 +40,7 @@ namespace eval ::tcl::clock { l li lii liii liv lv lvi lvii lviii lix lx lxi lxii lxiii lxiv lxv lxvi lxvii lxviii lxix lxx lxxi lxxii lxxiii lxxiv lxxv lxxvi lxxvii lxxviii lxxix - lxxx lxxxi lxxxii lxxxiii lxxxiv lxxxv lxxxvi lxxxvii lxxxviii + lxxx lxxxi lxxxii lxxxiii lxxxiv lxxxv lxxxvi lxxxvii lxxxviii lxxxix xc xci xcii xciii xciv xcv xcvi xcvii xcviii xcix c @@ -62,7 +62,7 @@ namespace eval ::tcl::clock { # # Parameters: # startOfYearArray - Name of an array in caller's scope that will -# be initialized as +# be initialized as # Results: # None # @@ -106,7 +106,7 @@ proc listYears { startOfYearArray } { set s $s2 incr y } - + # List years before 1970 set y 1970 @@ -138,7 +138,7 @@ proc listYears { startOfYearArray } { #---------------------------------------------------------------------- # -# processFile - +# processFile - # # Processes the 'clock.test' file, updating the test cases in it. # @@ -153,7 +153,7 @@ proc listYears { startOfYearArray } { proc processFile {d} { # Open two files - + set f1 [open [file join $d tests/clock.test] r] set f2 [open [file join $d tests/clock.new] w] @@ -164,7 +164,7 @@ proc processFile {d} { switch -exact -- $state { {} { puts $f2 $line - if { [regexp "^\# BEGIN (.*)" $line -> cases] + if { [regexp "^\# BEGIN (.*)" $line -> cases] && [string compare {} [info commands $cases]] } { set state inCaseSet $cases $f2 @@ -213,7 +213,7 @@ proc testcases2 { f2 } { listYears startOfYear # Define the roman numerals - + set roman { ? i ii iii iv v vi vii viii ix x xi xii xiii xiv xv xvi xvii xviii xix @@ -235,20 +235,20 @@ proc testcases2 { f2 } { } # Names of the months - + set short {{} Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec} set long { {} January February March April May June July August September October November December } - + # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test formatting of Gregorian year, month, day, all formats" puts $f2 "\# Formats tested: %b %B %c %Ec %C %EC %d %Od %e %Oe %h %j %J %m %Om %N %x %Ex %y %Oy %Y %EY" puts $f2 "" - + # Generate the test cases for the first and last day of every month # from 1896 to 2045 @@ -262,7 +262,7 @@ proc testcases2 { f2 } { if { $m == 2 && ( $y%4 == 0 && $y%100 != 0 || $y%400 == 0 ) } { incr hath } - + set b [lindex $short $m] set B [lindex $long $m] set C [format %02d [expr { $y / 100 }]] @@ -271,9 +271,9 @@ proc testcases2 { f2 } { set mm [format %02d $m] set N [format %2d $m] set yy [format %02d [expr { $y % 100 }]] - + set J [expr { ( $s / 86400 ) + 2440588 }] - + set dt $y-$mm-01 set result "" append result $b " " $B " " \ @@ -296,17 +296,17 @@ proc testcases2 { f2 } { puts $f2 "\t-format {%b %B %c %Ec %C %EC %d %Od %e %Oe %h %j %J %m %Om %N %x %Ex %y %Oy %Y} \\" puts $f2 "\t-gmt true -locale en_US_roman" puts $f2 "} {$result}" - + set hm1 [expr { $hath - 1 }] incr s [expr { 86400 * ( $hath - 1 ) }] incr yd $hm1 - + set dd [format %02d $hath] set ee [format %2d $hath] set j [format %03d $yd] - + set J [expr { ( $s / 86400 ) + 2440588 }] - + set dt $y-$mm-$dd set result "" append result $b " " $B " " \ @@ -332,7 +332,7 @@ proc testcases2 { f2 } { puts $f2 "\t-format {%b %B %c %Ec %C %EC %d %Od %e %Oe %h %j %J %m %Om %N %x %Ex %y %Oy %Y} \\" puts $f2 "\t-gmt true -locale en_US_roman" puts $f2 "} {$result}" - + incr s 86400 incr yd } @@ -451,7 +451,7 @@ proc testcases3 { f2 } { testISO $f2 $ym1 52 1 [expr { $secs - 5*86400 }] testISO $f2 $ym1 52 6 $secs testISO $f2 $ym1 52 7 [expr { $secs + 86400 }] - } + } testISO $f2 $y 1 1 [expr { $secs + 2*86400 }] testISO $f2 $y 1 6 [expr { $secs + 7*86400 }] testISO $f2 $y 1 7 [expr { $secs + 8*86400 }] @@ -466,10 +466,10 @@ proc testcases3 { f2 } { proc testISO { f2 G V u secs } { upvar 1 case case - + set longdays {Sunday Monday Tuesday Wednesday Thursday Friday Saturday Sunday} set shortdays {Sun Mon Tue Wed Thu Fri Sat Sun} - + puts $f2 "test clock-3.[incr case] {ISO week-based calendar [format %04d-W%02d-%d $G $V $u]} {" puts $f2 " clock format $secs -format {%a %A %g %G %u %U %V %w %W} -gmt true; \# $G-W[format %02d $V]-$u" puts $f2 "} {[lindex $shortdays $u] [lindex $longdays $u]\ @@ -478,7 +478,7 @@ proc testISO { f2 G V u secs } { [clock format $secs -format %U -gmt true]\ [format %02d $V] [expr { $u % 7 }]\ [clock format $secs -format %W -gmt true]}" - + } #---------------------------------------------------------------------- @@ -504,15 +504,15 @@ proc testcases4 { f2 } { puts $f2 "\# Test formatting of time of day" puts $f2 "\# Format groups tested: %H %OH %I %OI %k %Ok %l %Ol %M %OM %p %P %r %R %S %OS %T %X %EX %+" puts $f2 {} - + set i 0 set fmt "%H %OH %I %OI %k %Ok %l %Ol %M %OM %p %P %r %R %S %OS %T %X %EX %+" - foreach { h romanH I romanI am } { - 0 ? 12 xii AM - 1 i 1 i AM - 11 xi 11 xi AM - 12 xii 12 xii PM - 13 xiii 1 i PM + foreach { h romanH I romanI am } { + 0 ? 12 xii AM + 1 i 1 i AM + 11 xi 11 xi AM + 12 xii 12 xii PM + 13 xiii 1 i PM 23 xxiii 11 xi PM } { set hh [format %02d $h] @@ -547,7 +547,7 @@ proc testcases4 { f2 } { puts "testcases4: $i test cases." } - + #---------------------------------------------------------------------- # # testcases5 -- @@ -572,9 +572,9 @@ proc testcases5 { f2 } { puts $f2 {} puts $f2 "\# Test formatting of Daylight Saving Time" puts $f2 {} - + set fmt {%H:%M:%S %z %Z} - + set i 0 puts $f2 "test clock-5.[incr i] {does Detroit exist} {" puts $f2 " clock format 0 -format {} -timezone :America/Detroit" @@ -587,7 +587,7 @@ proc testcases5 { f2 } { puts $f2 " concat {ok}" puts $f2 " }" puts $f2 "} ok" - + foreach row $TZData(:America/Detroit) { foreach { t offset isdst tzname } $row break if { $t > -4000000000000 } { @@ -648,12 +648,12 @@ proc testcases5 { f2 } { proc testcases8 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of ccyymmdd" puts $f2 "" - - set n 0 + + set n 0 foreach year {1970 1971 2000 2001} { foreach month {01 12} { foreach day {02 31} { @@ -670,7 +670,7 @@ proc testcases8 { f2 } { puts $f2 "} $scanned" } } - } + } foreach fmt {%x %D} { set string [clock format $scanned \ -format $fmt \ @@ -708,11 +708,11 @@ proc testcases8 { f2 } { proc testcases11 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test precedence among yyyymmdd and yyyyddd" puts $f2 "" - + array set v { Y 1970 m 01 @@ -771,12 +771,12 @@ proc testcases11 { f2 } { proc testcases12 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of ccyyWwwd" puts $f2 "" - - set n 0 + + set n 0 foreach year {1970 1971 2000 2001} { foreach month {01 12} { foreach day {02 31} { @@ -817,12 +817,12 @@ proc testcases12 { f2 } { proc testcases14 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of yymmdd" puts $f2 "" - - set n 0 + + set n 0 foreach year {1938 1970 2000 2037} { foreach month {01 12} { foreach day {02 31} { @@ -839,7 +839,7 @@ proc testcases14 { f2 } { puts $f2 "} $scanned" } } - } + } } } } @@ -868,12 +868,12 @@ proc testcases14 { f2 } { proc testcases17 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of yyWwwd" puts $f2 "" - - set n 0 + + set n 0 foreach year {1970 1971 2000 2001} { foreach month {01 12} { foreach day {02 31} { @@ -914,12 +914,12 @@ proc testcases17 { f2 } { proc testcases19 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of mmdd" puts $f2 "" - - set n 0 + + set n 0 foreach year {1938 1970 2000 2037} { set base [clock scan ${year}0101 -gmt true] foreach month {01 12} { @@ -935,7 +935,7 @@ proc testcases19 { f2 } { puts $f2 " [list clock scan $string -format [list $mm $dd] -locale en_US_roman -base $base -gmt 1]" puts $f2 "} $scanned" } - } + } } } } @@ -964,12 +964,12 @@ proc testcases19 { f2 } { proc testcases22 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of Wwwd" puts $f2 "" - - set n 0 + + set n 0 foreach year {1970 1971 2000 2001} { set base [clock scan ${year}0104 -gmt true] foreach month {03 10} { @@ -1011,12 +1011,12 @@ proc testcases22 { f2 } { proc testcases24 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of naked day-of-month" puts $f2 "" - - set n 0 + + set n 0 foreach year {1970 2000} { foreach month {01 12} { set base [clock scan ${year}${month}01 -gmt true] @@ -1030,7 +1030,7 @@ proc testcases24 { f2 } { puts $f2 "test clock-24.[incr n] {parse naked day of month} {" puts $f2 " [list clock scan $string -format $dd -locale en_US_roman -base $base -gmt 1]" puts $f2 "} $scanned" - } + } } } } @@ -1059,12 +1059,12 @@ proc testcases24 { f2 } { proc testcases26 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of naked day of week" puts $f2 "" - - set n 0 + + set n 0 foreach year {1970 2001} { foreach week {01 52} { set base [clock scan ${year}W${week}4 \ @@ -1108,7 +1108,7 @@ proc testcases26 { f2 } { proc testcases29 { f2 } { # Put out a header describing the tests - + puts $f2 "" puts $f2 "\# Test parsing of time of day" puts $f2 "" @@ -1172,7 +1172,7 @@ proc testcases29 { f2 } { } } } - + } puts "testcases29: $n test cases" } diff --git a/tools/man2help.tcl b/tools/man2help.tcl index 018fa84..ca29226 100644 --- a/tools/man2help.tcl +++ b/tools/man2help.tcl @@ -36,7 +36,7 @@ proc generateContents {basename version files} { set lastTopic {} foreach topic [getTopics $package $section] { if {[string compare $lastTopic $topic]} { - set id $topics($package,$section,$topic) + set id $topics($package,$section,$topic) puts $fd "2 $topic=$id" set lastTopic $topic } diff --git a/tools/man2html.tcl b/tools/man2html.tcl index 124f631..13d6916 100644 --- a/tools/man2html.tcl +++ b/tools/man2html.tcl @@ -27,8 +27,8 @@ proc sarray {file args} { if {![array exists array]} { puts "sarray: \"$a\" isn't an array" break - } - + } + foreach name [lsort [array names array]] { regsub -all " " $name "\\ " name1 puts $file "set ${a}($name1) \{$array($name)\}" @@ -141,12 +141,12 @@ proc main {argv} { foreach package $packages { file mkdir $html_dir/$package - + # build hyperlink database arrays: NAME_file and KEY_file # puts "\nScanning man pages in $tcl_dir/$package/doc..." uplevel \#0 [list source $homeDir/man2html1.tcl] - + doDir $tcl_dir/$package/doc # clean up the NAME_file and KEY_file database arrays diff --git a/tools/man2html1.tcl b/tools/man2html1.tcl index 23a9e58..65ff106 100644 --- a/tools/man2html1.tcl +++ b/tools/man2html1.tcl @@ -10,7 +10,7 @@ package require Tcl 8.4- # Global variables used by these scripts: # # state - state variable that controls action of text proc. -# +# # curFile - tail of current man page. # # file - file pointer; for both xref.tcl and contents.html @@ -23,7 +23,7 @@ package require Tcl 8.4- # # lib - contains package name. Used to label section in contents.html # -# inDT - in dictionary term. +# inDT - in dictionary term. # text -- @@ -32,7 +32,7 @@ package require Tcl 8.4- # and KEY_file. # # DT: might do this: if first word of $dt matches $name and [llength $name==1] -# and [llength $dt > 1], then add to NAME_file. +# and [llength $dt > 1], then add to NAME_file. # # Arguments: # string - Text to index. @@ -86,7 +86,7 @@ proc macro {name args} { KEYWORDS {set state KEY} default {set state OFF} } - + } TP { global inDT @@ -138,7 +138,7 @@ proc newline {} { # initGlobals, tab, font, char, macro2 -- # -# These procedures do nothing during the first pass. +# These procedures do nothing during the first pass. # # Arguments: # None. @@ -214,9 +214,9 @@ proc doListing {file pattern} { proc doContents {file packageName} { global footer - + set file [open $file w] - + puts $file "<HTML><HEAD><TITLE>$packageName Manual</TITLE></HEAD><BODY>" puts $file "<H3>$packageName</H3>" doListing $file "*.1" @@ -237,8 +237,8 @@ proc doContents {file packageName} { # # This is the toplevel procedure that searches a man page # for hypertext links. It builds a data base consisting of -# two arrays: NAME_file and KEY file. It runs the man2tcl -# program to turn the man page into a script, then it evals +# two arrays: NAME_file and KEY file. It runs the man2tcl +# program to turn the man page into a script, then it evals # that script. # # Arguments: diff --git a/tools/mkdepend.tcl b/tools/mkdepend.tcl index de5fdba..ecb2206 100644 --- a/tools/mkdepend.tcl +++ b/tools/mkdepend.tcl @@ -10,20 +10,20 @@ # above copyright notice and the following two paragraphs appear in # all copies of this software. # -# IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, -# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF -# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR HAS BEEN ADVISED +# IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF +# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR HAS BEEN ADVISED # OF THE POSSIBILITY OF SUCH DAMAGE. # -# THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -# PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" +# THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" # BASIS, AND THE AUTHOR HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, # UPDATES, ENHANCEMENTS, OR MODIFICATIONS. #============================================================================== # # Modified heavily by David Gravereaux <davygrvy@pobox.com> about 9/17/2006. -# Original can be found @ +# Original can be found @ # http://web.archive.org/web/20070616205924/http://www.doc.ic.ac.uk/~np2/software/mkdepend.html #============================================================================== diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl index e33b3c7..8125790 100644 --- a/tools/uniParse.tcl +++ b/tools/uniParse.tcl @@ -396,7 +396,11 @@ enum { * Unicode character tables. */ -#define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#if TCL_UTF_MAX > 3 +# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0x1fffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#else +# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#endif " close $f diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 2cb0027..7ffbf8d 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -155,6 +155,11 @@ TclpFindExecutable( goto done; } + if (TclpGetCwd(NULL, &cwd) == NULL) { + TclSetObjNameOfExecutable(Tcl_NewObj(), NULL); + goto done; + } + /* * The name is relative to the current working directory. First strip off * a leading "./", if any, then add the full path name of the current @@ -168,8 +173,6 @@ TclpFindExecutable( Tcl_DStringInit(&nameString); Tcl_DStringAppend(&nameString, name, -1); - TclpGetCwd(NULL, &cwd); - Tcl_DStringFree(&buffer); Tcl_UtfToExternalDString(NULL, Tcl_DStringValue(&cwd), Tcl_DStringLength(&cwd), &buffer); diff --git a/win/configure b/win/configure index d9a6234..f8ca075 100755 --- a/win/configure +++ b/win/configure @@ -3757,6 +3757,13 @@ echo "${ECHO_T}using shared flags" >&6 # Add SHLIB_LD_LIBS to the Make rule, not here. LIBRARIES="\${SHARED_LIBRARIES}" EXESUFFIX="\${DBGX}.exe" + case "x`echo \${VisualStudioVersion}`" in + x14*) + lflags="${lflags} -nodefaultlib:libucrt.lib" + ;; + *) + ;; + esac fi MAKE_DLL="\${SHLIB_LD} \$(LDFLAGS) -out:\$@" # DLLSUFFIX is separate because it is the building block for @@ -3797,6 +3804,15 @@ echo "${ECHO_T} Using 64-bit $MACHINE mode" >&6 fi LIBS="netapi32.lib kernel32.lib user32.lib advapi32.lib ws2_32.lib" + + case "x`echo \${VisualStudioVersion}`" in + x14*) + LIBS="$LIBS ucrt.lib" + ;; + *) + ;; + esac + if test "$do64bit" != "no" ; then # The space-based-path will work for the Makefile, but will # not work if AC_TRY_COMPILE is called. TEA has the @@ -3871,7 +3887,7 @@ fi CFLAGS_DEBUG="-nologo -Zi -Od ${runtime}d" # Do not use -O2 for Win64 - this has proved buggy in code gen. CFLAGS_OPTIMIZE="-nologo -O1 ${runtime}" - lflags="-nologo -MACHINE:${MACHINE} -LIBPATH:\"${MSSDK}/Lib/${MACHINE}\"" + lflags="${lflags} -nologo -MACHINE:${MACHINE} -LIBPATH:\"${MSSDK}/Lib/${MACHINE}\"" LINKBIN="\"${PATH64}/link.exe\"" # Avoid 'unresolved external symbol __security_cookie' errors. # c.f. http://support.microsoft.com/?id=894573 @@ -3883,7 +3899,7 @@ fi CFLAGS_DEBUG="-nologo -Z7 -Od -WX ${runtime}d" # -O2 - create fast code (/Og /Oi /Ot /Oy /Ob2 /Gs /GF /Gy) CFLAGS_OPTIMIZE="-nologo -O2 ${runtime}" - lflags="-nologo" + lflags="${lflags} -nologo" LINKBIN="link" fi diff --git a/win/makefile.vc b/win/makefile.vc index 616ced7..ae277f7 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -529,6 +529,10 @@ lflags = -nologo -machine:$(MACHINE) $(LINKERFLAGS) $(ldebug) lflags = $(lflags) -profile
!endif
+!if $(MSVCRT) && !($(DEBUG) && !$(UNCHECKED)) && $(VCVERSION) >= 1900
+lflags = $(lflags) -nodefaultlib:libucrt.lib
+!endif
+
!if $(ALIGN98_HACK) && !$(STATIC_BUILD)
### Align sections for PE size savings.
lflags = $(lflags) -opt:nowin98
@@ -553,6 +557,9 @@ baselibs = netapi32.lib kernel32.lib user32.lib advapi32.lib ws2_32.lib baselibs = $(baselibs) bufferoverflowU.lib
!endif
!endif
+!if $(MSVCRT) && !($(DEBUG) && !$(UNCHECKED)) && $(VCVERSION) >= 1900
+baselibs = $(baselibs) ucrt.lib
+!endif
#---------------------------------------------------------------------
# TclTest flags
@@ -791,6 +791,13 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ # Add SHLIB_LD_LIBS to the Make rule, not here. LIBRARIES="\${SHARED_LIBRARIES}" EXESUFFIX="\${DBGX}.exe" + case "x`echo \${VisualStudioVersion}`" in + x14*) + lflags="${lflags} -nodefaultlib:libucrt.lib" + ;; + *) + ;; + esac fi MAKE_DLL="\${SHLIB_LD} \$(LDFLAGS) -out:\[$]@" # DLLSUFFIX is separate because it is the building block for @@ -828,6 +835,15 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ fi LIBS="netapi32.lib kernel32.lib user32.lib advapi32.lib ws2_32.lib" + + case "x`echo \${VisualStudioVersion}`" in + x14*) + LIBS="$LIBS ucrt.lib" + ;; + *) + ;; + esac + if test "$do64bit" != "no" ; then # The space-based-path will work for the Makefile, but will # not work if AC_TRY_COMPILE is called. TEA has the @@ -842,7 +858,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ CFLAGS_DEBUG="-nologo -Zi -Od ${runtime}d" # Do not use -O2 for Win64 - this has proved buggy in code gen. CFLAGS_OPTIMIZE="-nologo -O1 ${runtime}" - lflags="-nologo -MACHINE:${MACHINE} -LIBPATH:\"${MSSDK}/Lib/${MACHINE}\"" + lflags="${lflags} -nologo -MACHINE:${MACHINE} -LIBPATH:\"${MSSDK}/Lib/${MACHINE}\"" LINKBIN="\"${PATH64}/link.exe\"" # Avoid 'unresolved external symbol __security_cookie' errors. # c.f. http://support.microsoft.com/?id=894573 @@ -854,7 +870,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ CFLAGS_DEBUG="-nologo -Z7 -Od -WX ${runtime}d" # -O2 - create fast code (/Og /Oi /Ot /Oy /Ob2 /Gs /GF /Gy) CFLAGS_OPTIMIZE="-nologo -O2 ${runtime}" - lflags="-nologo" + lflags="${lflags} -nologo" LINKBIN="link" fi diff --git a/win/tclWinFile.c b/win/tclWinFile.c index 6b9d373..02927ad 100644 --- a/win/tclWinFile.c +++ b/win/tclWinFile.c @@ -3001,7 +3001,7 @@ TclNativeCreateNativeRep( } done: - + TclDecrRefCount(validPathPtr); return nativePathPtr; } |
