From 494c2de3a748b449c69ce322a1a741f5a31fd4d5 Mon Sep 17 00:00:00 2001 From: stanton Date: Mon, 21 Sep 1998 23:39:52 +0000 Subject: Added contents of Tcl 8.1a2 --- README | 208 +- changes | 304 +- compat/memcmp.c | 61 + compat/stdlib.h | 4 +- compat/strftime.c | 6 +- compat/string.h | 6 +- doc/Backslash.3 | 30 +- doc/ByteArrObj.3 | 91 + doc/CrtChannel.3 | 46 +- doc/CrtObjCmd.3 | 19 +- doc/Encoding.3 | 464 + doc/Eval.3 | 190 +- doc/EvalObj.3 | 91 - doc/Exit.3 | 46 +- doc/GetIndex.3 | 31 +- doc/ObjSetVar.3 | 162 - doc/Object.3 | 11 +- doc/OpenFileChnl.3 | 325 +- doc/ParseCmd.3 | 426 + doc/SaveResult.3 | 65 + doc/SetVar.3 | 194 +- doc/StringObj.3 | 32 +- doc/Tcl.n | 66 +- doc/Thread.3 | 97 + doc/ToUpper.3 | 90 + doc/TraceVar.3 | 42 +- doc/Translate.3 | 8 +- doc/Utf.3 | 160 + doc/binary.n | 2 +- doc/exec.n | 17 +- doc/fconfigure.n | 194 +- doc/glob.n | 19 +- doc/man.macros | 4 +- doc/pkgMkIndex.n | 39 +- doc/regexp.n | 555 +- doc/resource.n | 4 +- doc/safe.n | 5 +- generic/chr.h | 48 + generic/color.c | 605 + generic/compile.c | 2089 ++++ generic/exec.c | 1753 +++ generic/guts.h | 233 + generic/lex.c | 938 ++ generic/locale.c | 675 ++ generic/nfa.c | 1368 +++ generic/regexp.c | 1333 --- generic/tcl.h | 429 +- generic/tclAlloc.c | 197 +- generic/tclAsync.c | 14 +- generic/tclBasic.c | 1356 +-- generic/tclBinary.c | 827 +- generic/tclCkalloc.c | 190 +- generic/tclClock.c | 47 +- generic/tclCmdAH.c | 1261 ++- generic/tclCmdIL.c | 314 +- generic/tclCmdMZ.c | 1520 +-- generic/tclCompCmds.c | 1964 ++++ generic/tclCompExpr.c | 2666 ++--- generic/tclCompile.c | 9247 ++++------------ generic/tclCompile.h | 502 +- generic/tclDate.c | 24 +- generic/tclEncoding.c | 2502 +++++ generic/tclEnv.c | 295 +- generic/tclEvent.c | 561 +- generic/tclExecute.c | 2027 ++-- generic/tclFCmd.c | 119 +- generic/tclFileName.c | 562 +- generic/tclGet.c | 60 +- generic/tclGetDate.y | 24 +- generic/tclHash.c | 7 +- generic/tclHistory.c | 26 +- generic/tclIO.c | 4720 +++++--- generic/tclIOCmd.c | 746 +- generic/tclIOSock.c | 29 +- generic/tclIOUtil.c | 114 +- generic/tclIndexObj.c | 160 +- generic/tclInitScript.h | 47 + generic/tclInt.h | 902 +- generic/tclInterp.c | 4836 +++----- generic/tclLink.c | 41 +- generic/tclListObj.c | 17 +- generic/tclLiteral.c | 929 ++ generic/tclLoad.c | 232 +- generic/tclLoadNone.c | 6 +- generic/tclMain.c | 142 +- generic/tclNamesp.c | 120 +- generic/tclNotify.c | 493 +- generic/tclObj.c | 351 +- generic/tclParse.c | 2496 +++-- generic/tclParseExpr.c | 1826 +++ generic/tclPipe.c | 94 +- generic/tclPkg.c | 410 +- generic/tclPort.h | 4 +- generic/tclPosixStr.c | 6 +- generic/tclPreserve.c | 217 +- generic/tclProc.c | 298 +- generic/tclRegexp.c | 794 ++ generic/tclRegexp.h | 274 +- generic/tclResult.c | 955 ++ generic/tclStringObj.c | 49 +- generic/tclTest.c | 1343 ++- generic/tclTestObj.c | 191 +- generic/tclThread.c | 546 + generic/tclThreadTest.c | 878 ++ generic/tclTimer.c | 388 +- generic/tclUtf.c | 1258 +++ generic/tclUtil.c | 1253 +-- generic/tclVar.c | 534 +- library/auto.tcl | 203 + library/encoding/ascii.enc | 20 + library/encoding/big5.enc | 1499 +++ library/encoding/cp1250.enc | 20 + library/encoding/cp1251.enc | 20 + library/encoding/cp1252.enc | 20 + library/encoding/cp1253.enc | 20 + library/encoding/cp1254.enc | 20 + library/encoding/cp1255.enc | 20 + library/encoding/cp1256.enc | 20 + library/encoding/cp1257.enc | 20 + library/encoding/cp1258.enc | 20 + library/encoding/cp437.enc | 20 + library/encoding/cp737.enc | 20 + library/encoding/cp775.enc | 20 + library/encoding/cp850.enc | 20 + library/encoding/cp852.enc | 20 + library/encoding/cp855.enc | 20 + library/encoding/cp857.enc | 20 + library/encoding/cp860.enc | 20 + library/encoding/cp861.enc | 20 + library/encoding/cp862.enc | 20 + library/encoding/cp863.enc | 20 + library/encoding/cp864.enc | 20 + library/encoding/cp865.enc | 20 + library/encoding/cp866.enc | 20 + library/encoding/cp869.enc | 20 + library/encoding/cp874.enc | 20 + library/encoding/cp932.enc | 785 ++ library/encoding/cp936.enc | 2162 ++++ library/encoding/cp949.enc | 2128 ++++ library/encoding/cp950.enc | 1499 +++ library/encoding/dingbats.enc | 20 + library/encoding/euc-jp.enc | 1346 +++ library/encoding/gb12345.enc | 1414 +++ library/encoding/gb1988.enc | 20 + library/encoding/gb2312.enc | 1380 +++ library/encoding/iso2022-jp.enc | 13 + library/encoding/iso2022-kr.enc | 7 + library/encoding/iso2022.enc | 16 + library/encoding/iso8859-1.enc | 20 + library/encoding/iso8859-2.enc | 20 + library/encoding/iso8859-3.enc | 20 + library/encoding/iso8859-4.enc | 20 + library/encoding/iso8859-5.enc | 20 + library/encoding/iso8859-6.enc | 20 + library/encoding/iso8859-7.enc | 20 + library/encoding/iso8859-8.enc | 20 + library/encoding/iso8859-9.enc | 20 + library/encoding/jis0201.enc | 20 + library/encoding/jis0208.enc | 1312 +++ library/encoding/jis0212.enc | 1159 ++ library/encoding/macCentEuro.enc | 20 + library/encoding/macCroatian.enc | 20 + library/encoding/macCyrillic.enc | 20 + library/encoding/macDingbats.enc | 20 + library/encoding/macGreek.enc | 20 + library/encoding/macIceland.enc | 20 + library/encoding/macJapan.enc | 785 ++ library/encoding/macRoman.enc | 20 + library/encoding/macRomania.enc | 20 + library/encoding/macThai.enc | 20 + library/encoding/macTurkish.enc | 20 + library/encoding/macUkraine.enc | 20 + library/encoding/shiftjis.enc | 683 ++ library/encoding/symbol.enc | 20 + library/history.tcl | 20 +- library/http/http.tcl | 30 +- library/http1.0/http.tcl | 32 +- library/http2.0/http.tcl | 30 +- library/http2.1/http.tcl | 30 +- library/http2.3/http.tcl | 30 +- library/init.tcl | 550 +- library/opt/optparse.tcl | 1103 ++ library/opt/pkgIndex.tcl | 11 + library/opt0.4/optparse.tcl | 1103 ++ library/opt0.4/pkgIndex.tcl | 11 + library/package.tcl | 302 + library/parray.tcl | 4 +- library/safe.tcl | 10 +- library/tclIndex | 14 +- library/word.tcl | 24 +- mac/MW_TclHeader.pch | 70 +- mac/README | 83 +- mac/tclMac.h | 10 +- mac/tclMacAlloc.c | 18 +- mac/tclMacAppInit.c | 4 +- mac/tclMacBOAAppInit.c | 4 +- mac/tclMacBOAMain.c | 27 +- mac/tclMacChan.c | 143 +- mac/tclMacCommonDefines.h | 90 + mac/tclMacExit.c | 4 +- mac/tclMacFCmd.c | 457 +- mac/tclMacFile.c | 653 +- mac/tclMacInit.c | 585 +- mac/tclMacInt.h | 9 +- mac/tclMacLibrary.r | 6 +- mac/tclMacLoad.c | 61 +- mac/tclMacMSLPrefix.h | 2 +- mac/tclMacNotify.c | 123 +- mac/tclMacOSA.c | 8 +- mac/tclMacPort.h | 275 +- mac/tclMacProjects.sit.hqx | 4081 ++----- mac/tclMacResource.c | 87 +- mac/tclMacResource.r | 6 +- mac/tclMacSock.c | 184 +- mac/tclMacTclCode.r | 36 + mac/tclMacThrd.c | 795 ++ mac/tclMacThrd.h | 20 + mac/tclMacUnix.c | 145 +- mac/tclMacUtil.c | 4 +- tests/all | 22 - tests/append.test | 10 +- tests/assocd.test | 4 +- tests/async.test | 3 +- tests/basic.test | 204 +- tests/binary.test | 6 +- tests/case.test | 4 +- tests/clock.test | 55 +- tests/cmdAH.test | 649 +- tests/cmdIL.test | 16 +- tests/cmdInfo.test | 5 +- tests/cmdMZ.test | 559 + tests/compExpr-old.test | 670 ++ tests/compExpr.test | 323 + tests/compile.test | 28 +- tests/concat.test | 4 +- tests/dcall.test | 4 +- tests/defs | 271 +- tests/dstring.test | 4 +- tests/encoding.test | 227 + tests/env.test | 39 +- tests/error.test | 4 +- tests/eval.test | 4 +- tests/event.test | 4 +- tests/exec.test | 23 +- tests/execute.test | 4 +- tests/expr-old.test | 24 +- tests/fCmd.test | 10 +- tests/fileName.test | 4 +- tests/for-old.test | 4 +- tests/for.test | 140 +- tests/foreach.test | 4 +- tests/format.test | 207 +- tests/get.test | 22 +- tests/history.test | 3 +- tests/http.test | 169 +- tests/httpd | 148 + tests/if-old.test | 4 +- tests/if.test | 514 +- tests/incr-old.test | 4 +- tests/incr.test | 254 +- tests/indexObj.test | 4 +- tests/info.test | 194 +- tests/init.test | 3 +- tests/interp.test | 133 +- tests/io.test | 2535 ++++- tests/ioCmd.test | 23 +- tests/join.test | 3 +- tests/lindex.test | 4 +- tests/link.test | 5 +- tests/linsert.test | 4 +- tests/list.test | 4 +- tests/listObj.test | 11 +- tests/llength.test | 4 +- tests/load.test | 12 +- tests/lrange.test | 4 +- tests/lreplace.test | 4 +- tests/lsearch.test | 4 +- tests/macFCmd.test | 4 +- tests/misc.test | 11 +- tests/namespace-old.test | 4 +- tests/namespace.test | 4 +- tests/obj.test | 154 +- tests/opt.test | 29 +- tests/osa.test | 4 +- tests/parse.test | 1191 +- tests/parseExpr.test | 619 ++ tests/parseOld.test | 529 + tests/pid.test | 4 +- tests/pkg.test | 5 +- tests/proc-old.test | 3 +- tests/proc.test | 3 +- tests/regexp.test | 34 +- tests/regexp2.test | 3176 ++++++ tests/regexp3.test | 3295 ++++++ tests/registry.test | 47 +- tests/rename.test | 6 +- tests/result.test | 81 + tests/safe.test | 4 +- tests/scan.test | 21 +- tests/security.test | 36 + tests/set-old.test | 4 +- tests/set.test | 255 +- tests/socket.test | 62 +- tests/source.test | 38 +- tests/stringObj.test | 3 +- tests/subst.test | 6 +- tests/switch.test | 4 +- tests/thread.test | 217 + tests/timer.test | 84 +- tests/trace.test | 8 +- tests/unixFCmd.test | 9 +- tests/unixInit.test | 155 + tests/unixNotfy.test | 7 +- tests/unknown.test | 4 +- tests/uplevel.test | 3 +- tests/upvar.test | 4 +- tests/utf.test | 197 + tests/util.test | 208 +- tests/var.test | 10 +- tests/while-old.test | 4 +- tests/while.test | 294 +- tests/winFCmd.test | 84 +- tests/winFile.test | 51 + tests/winNotify.test | 3 +- tests/winPipe.test | 218 +- tools/Makefile.in | 2 +- tools/configure.in | 2 +- tools/cvtEOL.tcl | 35 + tools/encoding/Makefile | 99 + tools/encoding/README | 5 + tools/encoding/ascii.txt | 95 + tools/encoding/big5.txt | 13797 +++++++++++++++++++++++ tools/encoding/cp1250.txt | 275 + tools/encoding/cp1251.txt | 275 + tools/encoding/cp1252.txt | 275 + tools/encoding/cp1253.txt | 275 + tools/encoding/cp1254.txt | 275 + tools/encoding/cp1255.txt | 275 + tools/encoding/cp1256.txt | 275 + tools/encoding/cp1257.txt | 275 + tools/encoding/cp1258.txt | 275 + tools/encoding/cp437.txt | 274 + tools/encoding/cp737.txt | 274 + tools/encoding/cp775.txt | 275 + tools/encoding/cp850.txt | 274 + tools/encoding/cp852.txt | 274 + tools/encoding/cp855.txt | 275 + tools/encoding/cp857.txt | 275 + tools/encoding/cp860.txt | 275 + tools/encoding/cp861.txt | 275 + tools/encoding/cp862.txt | 275 + tools/encoding/cp863.txt | 275 + tools/encoding/cp864.txt | 275 + tools/encoding/cp865.txt | 275 + tools/encoding/cp866.txt | 275 + tools/encoding/cp869.txt | 275 + tools/encoding/cp874.txt | 275 + tools/encoding/cp932.txt | 7999 ++++++++++++++ tools/encoding/cp936.txt | 22066 +++++++++++++++++++++++++++++++++++++ tools/encoding/cp949.txt | 17321 +++++++++++++++++++++++++++++ tools/encoding/cp950.txt | 13777 +++++++++++++++++++++++ tools/encoding/dingbats.txt | 250 + tools/encoding/gb12345.txt | 7604 +++++++++++++ tools/encoding/gb1988.txt | 158 + tools/encoding/gb2312.txt | 7501 +++++++++++++ tools/encoding/iso2022-jp.esc | 10 + tools/encoding/iso2022-kr.esc | 5 + tools/encoding/iso2022.esc | 14 + tools/encoding/iso8859-1.txt | 230 + tools/encoding/iso8859-2.txt | 230 + tools/encoding/iso8859-3.txt | 223 + tools/encoding/iso8859-4.txt | 230 + tools/encoding/iso8859-5.txt | 230 + tools/encoding/iso8859-6.txt | 185 + tools/encoding/iso8859-7.txt | 224 + tools/encoding/iso8859-8.txt | 192 + tools/encoding/iso8859-9.txt | 232 + tools/encoding/jis0201.txt | 202 + tools/encoding/jis0208.txt | 6940 ++++++++++++ tools/encoding/jis0212.txt | 6141 +++++++++++ tools/encoding/macCentEuro.txt | 293 + tools/encoding/macCroatian.txt | 287 + tools/encoding/macCyrillic.txt | 287 + tools/encoding/macDingbats.txt | 260 + tools/encoding/macGreek.txt | 290 + tools/encoding/macIceland.txt | 285 + tools/encoding/macJapan.txt | 7598 +++++++++++++ tools/encoding/macRoman.txt | 301 + tools/encoding/macRomania.txt | 285 + tools/encoding/macThai.txt | 299 + tools/encoding/macTurkish.txt | 289 + tools/encoding/macUkraine.txt | 279 + tools/encoding/shiftjis.txt | 7095 ++++++++++++ tools/encoding/symbol.txt | 265 + tools/encoding/txt2enc.c | 244 + tools/index.tcl | 2 +- tools/man2help.tcl | 2 +- tools/man2help2.tcl | 2 +- tools/man2html.tcl | 181 + tools/man2html1.tcl | 269 + tools/man2html2.tcl | 871 ++ tools/man2tcl.c | 4 +- tools/regexpTestLib.tcl | 266 + tools/spencer2regexp.tcl | 174 + tools/spencer2testregexp.tcl | 53 + tools/str2c | 61 + tools/tcl.wse | 2173 ++++ tools/tcl8.1-tk8.1-man-html.tcl | 1662 +++ tools/tclmin.wse | 247 + unix/Makefile.in | 148 +- unix/README | 10 +- unix/configure.in | 51 +- unix/dltest/pkge.c | 5 +- unix/mkLinks | 266 +- unix/tclAppInit.c | 28 +- unix/tclLoadAix.c | 6 +- unix/tclLoadAout.c | 69 +- unix/tclLoadDl.c | 82 +- unix/tclLoadDld.c | 47 +- unix/tclLoadNext.c | 41 +- unix/tclLoadOSF.c | 42 +- unix/tclLoadShl.c | 57 +- unix/tclMtherr.c | 13 +- unix/tclUnixChan.c | 529 +- unix/tclUnixEvent.c | 4 +- unix/tclUnixFCmd.c | 705 +- unix/tclUnixFile.c | 601 +- unix/tclUnixInit.c | 489 +- unix/tclUnixNotfy.c | 592 +- unix/tclUnixPipe.c | 103 +- unix/tclUnixPort.h | 153 +- unix/tclUnixSock.c | 34 +- unix/tclUnixTest.c | 10 +- unix/tclUnixThrd.c | 717 ++ unix/tclUnixThrd.h | 21 + unix/tclUnixTime.c | 5 +- unix/tclXtTest.c | 4 +- win/README | 47 +- win/makefile.bc | 27 +- win/makefile.vc | 53 +- win/pkgIndex.tcl | 4 +- win/tcl.rc | 14 +- win/tclAppInit.c | 46 +- win/tclWin32Dll.c | 537 +- win/tclWinChan.c | 147 +- win/tclWinError.c | 5 +- win/tclWinFCmd.c | 1036 +- win/tclWinFile.c | 934 +- win/tclWinInit.c | 575 +- win/tclWinInt.h | 74 +- win/tclWinLoad.c | 77 +- win/tclWinMtherr.c | 15 +- win/tclWinNotify.c | 224 +- win/tclWinPipe.c | 1465 ++- win/tclWinPort.h | 391 +- win/tclWinReg.c | 57 +- win/tclWinSock.c | 559 +- win/tclWinTest.c | 5 +- win/tclWinThrd.c | 724 ++ win/tclWinThrd.h | 21 + win/tclWinTime.c | 116 +- win/tclsh.rc | 10 +- 462 files changed, 229214 insertions(+), 35060 deletions(-) create mode 100644 compat/memcmp.c create mode 100644 doc/ByteArrObj.3 create mode 100644 doc/Encoding.3 delete mode 100644 doc/EvalObj.3 delete mode 100644 doc/ObjSetVar.3 create mode 100644 doc/ParseCmd.3 create mode 100644 doc/SaveResult.3 create mode 100644 doc/Thread.3 create mode 100644 doc/ToUpper.3 create mode 100644 doc/Utf.3 create mode 100644 generic/chr.h create mode 100644 generic/color.c create mode 100644 generic/compile.c create mode 100644 generic/exec.c create mode 100644 generic/guts.h create mode 100644 generic/lex.c create mode 100644 generic/locale.c create mode 100644 generic/nfa.c delete mode 100644 generic/regexp.c create mode 100644 generic/tclCompCmds.c create mode 100644 generic/tclEncoding.c create mode 100644 generic/tclInitScript.h create mode 100644 generic/tclLiteral.c create mode 100644 generic/tclParseExpr.c create mode 100644 generic/tclRegexp.c create mode 100644 generic/tclResult.c create mode 100644 generic/tclThread.c create mode 100644 generic/tclThreadTest.c create mode 100644 generic/tclUtf.c create mode 100644 library/auto.tcl create mode 100644 library/encoding/ascii.enc create mode 100644 library/encoding/big5.enc create mode 100644 library/encoding/cp1250.enc create mode 100644 library/encoding/cp1251.enc create mode 100644 library/encoding/cp1252.enc create mode 100644 library/encoding/cp1253.enc create mode 100644 library/encoding/cp1254.enc create mode 100644 library/encoding/cp1255.enc create mode 100644 library/encoding/cp1256.enc create mode 100644 library/encoding/cp1257.enc create mode 100644 library/encoding/cp1258.enc create mode 100644 library/encoding/cp437.enc create mode 100644 library/encoding/cp737.enc create mode 100644 library/encoding/cp775.enc create mode 100644 library/encoding/cp850.enc create mode 100644 library/encoding/cp852.enc create mode 100644 library/encoding/cp855.enc create mode 100644 library/encoding/cp857.enc create mode 100644 library/encoding/cp860.enc create mode 100644 library/encoding/cp861.enc create mode 100644 library/encoding/cp862.enc create mode 100644 library/encoding/cp863.enc create mode 100644 library/encoding/cp864.enc create mode 100644 library/encoding/cp865.enc create mode 100644 library/encoding/cp866.enc create mode 100644 library/encoding/cp869.enc create mode 100644 library/encoding/cp874.enc create mode 100644 library/encoding/cp932.enc create mode 100644 library/encoding/cp936.enc create mode 100644 library/encoding/cp949.enc create mode 100644 library/encoding/cp950.enc create mode 100644 library/encoding/dingbats.enc create mode 100644 library/encoding/euc-jp.enc create mode 100644 library/encoding/gb12345.enc create mode 100644 library/encoding/gb1988.enc create mode 100644 library/encoding/gb2312.enc create mode 100644 library/encoding/iso2022-jp.enc create mode 100644 library/encoding/iso2022-kr.enc create mode 100644 library/encoding/iso2022.enc create mode 100644 library/encoding/iso8859-1.enc create mode 100644 library/encoding/iso8859-2.enc create mode 100644 library/encoding/iso8859-3.enc create mode 100644 library/encoding/iso8859-4.enc create mode 100644 library/encoding/iso8859-5.enc create mode 100644 library/encoding/iso8859-6.enc create mode 100644 library/encoding/iso8859-7.enc create mode 100644 library/encoding/iso8859-8.enc create mode 100644 library/encoding/iso8859-9.enc create mode 100644 library/encoding/jis0201.enc create mode 100644 library/encoding/jis0208.enc create mode 100644 library/encoding/jis0212.enc create mode 100644 library/encoding/macCentEuro.enc create mode 100644 library/encoding/macCroatian.enc create mode 100644 library/encoding/macCyrillic.enc create mode 100644 library/encoding/macDingbats.enc create mode 100644 library/encoding/macGreek.enc create mode 100644 library/encoding/macIceland.enc create mode 100644 library/encoding/macJapan.enc create mode 100644 library/encoding/macRoman.enc create mode 100644 library/encoding/macRomania.enc create mode 100644 library/encoding/macThai.enc create mode 100644 library/encoding/macTurkish.enc create mode 100644 library/encoding/macUkraine.enc create mode 100644 library/encoding/shiftjis.enc create mode 100644 library/encoding/symbol.enc create mode 100644 library/opt/optparse.tcl create mode 100644 library/opt/pkgIndex.tcl create mode 100644 library/opt0.4/optparse.tcl create mode 100644 library/opt0.4/pkgIndex.tcl create mode 100644 library/package.tcl create mode 100644 mac/tclMacCommonDefines.h create mode 100644 mac/tclMacTclCode.r create mode 100644 mac/tclMacThrd.c create mode 100644 mac/tclMacThrd.h delete mode 100644 tests/all create mode 100644 tests/cmdMZ.test create mode 100644 tests/compExpr-old.test create mode 100644 tests/compExpr.test create mode 100644 tests/encoding.test create mode 100644 tests/httpd create mode 100644 tests/parseExpr.test create mode 100644 tests/parseOld.test create mode 100644 tests/regexp2.test create mode 100644 tests/regexp3.test create mode 100644 tests/result.test create mode 100644 tests/security.test create mode 100644 tests/thread.test create mode 100644 tests/unixInit.test create mode 100644 tests/utf.test create mode 100644 tests/winFile.test create mode 100644 tools/cvtEOL.tcl create mode 100644 tools/encoding/Makefile create mode 100644 tools/encoding/README create mode 100644 tools/encoding/ascii.txt create mode 100644 tools/encoding/big5.txt create mode 100644 tools/encoding/cp1250.txt create mode 100644 tools/encoding/cp1251.txt create mode 100644 tools/encoding/cp1252.txt create mode 100644 tools/encoding/cp1253.txt create mode 100644 tools/encoding/cp1254.txt create mode 100644 tools/encoding/cp1255.txt create mode 100644 tools/encoding/cp1256.txt create mode 100644 tools/encoding/cp1257.txt create mode 100644 tools/encoding/cp1258.txt create mode 100644 tools/encoding/cp437.txt create mode 100644 tools/encoding/cp737.txt create mode 100644 tools/encoding/cp775.txt create mode 100644 tools/encoding/cp850.txt create mode 100644 tools/encoding/cp852.txt create mode 100644 tools/encoding/cp855.txt create mode 100644 tools/encoding/cp857.txt create mode 100644 tools/encoding/cp860.txt create mode 100644 tools/encoding/cp861.txt create mode 100644 tools/encoding/cp862.txt create mode 100644 tools/encoding/cp863.txt create mode 100644 tools/encoding/cp864.txt create mode 100644 tools/encoding/cp865.txt create mode 100644 tools/encoding/cp866.txt create mode 100644 tools/encoding/cp869.txt create mode 100644 tools/encoding/cp874.txt create mode 100644 tools/encoding/cp932.txt create mode 100644 tools/encoding/cp936.txt create mode 100644 tools/encoding/cp949.txt create mode 100644 tools/encoding/cp950.txt create mode 100644 tools/encoding/dingbats.txt create mode 100644 tools/encoding/gb12345.txt create mode 100644 tools/encoding/gb1988.txt create mode 100644 tools/encoding/gb2312.txt create mode 100644 tools/encoding/iso2022-jp.esc create mode 100644 tools/encoding/iso2022-kr.esc create mode 100644 tools/encoding/iso2022.esc create mode 100644 tools/encoding/iso8859-1.txt create mode 100644 tools/encoding/iso8859-2.txt create mode 100644 tools/encoding/iso8859-3.txt create mode 100644 tools/encoding/iso8859-4.txt create mode 100644 tools/encoding/iso8859-5.txt create mode 100644 tools/encoding/iso8859-6.txt create mode 100644 tools/encoding/iso8859-7.txt create mode 100644 tools/encoding/iso8859-8.txt create mode 100644 tools/encoding/iso8859-9.txt create mode 100644 tools/encoding/jis0201.txt create mode 100644 tools/encoding/jis0208.txt create mode 100644 tools/encoding/jis0212.txt create mode 100644 tools/encoding/macCentEuro.txt create mode 100644 tools/encoding/macCroatian.txt create mode 100644 tools/encoding/macCyrillic.txt create mode 100644 tools/encoding/macDingbats.txt create mode 100644 tools/encoding/macGreek.txt create mode 100644 tools/encoding/macIceland.txt create mode 100644 tools/encoding/macJapan.txt create mode 100644 tools/encoding/macRoman.txt create mode 100644 tools/encoding/macRomania.txt create mode 100644 tools/encoding/macThai.txt create mode 100644 tools/encoding/macTurkish.txt create mode 100644 tools/encoding/macUkraine.txt create mode 100644 tools/encoding/shiftjis.txt create mode 100644 tools/encoding/symbol.txt create mode 100644 tools/encoding/txt2enc.c create mode 100644 tools/man2html.tcl create mode 100644 tools/man2html1.tcl create mode 100644 tools/man2html2.tcl create mode 100644 tools/regexpTestLib.tcl create mode 100644 tools/spencer2regexp.tcl create mode 100644 tools/spencer2testregexp.tcl create mode 100644 tools/str2c create mode 100644 tools/tcl.wse create mode 100644 tools/tcl8.1-tk8.1-man-html.tcl create mode 100644 tools/tclmin.wse create mode 100644 unix/tclUnixThrd.c create mode 100644 unix/tclUnixThrd.h create mode 100644 win/tclWinThrd.c create mode 100644 win/tclWinThrd.h diff --git a/README b/README index 13eed9c..d68615c 100644 --- a/README +++ b/README @@ -1,23 +1,28 @@ Tcl -SCCS: @(#) README 1.52 97/11/20 12:43:16 +SCCS: @(#) README 1.57 98/02/18 16:11:20 1. Introduction --------------- This directory and its descendants contain the sources and documentation for Tcl, an embeddable scripting language. The information here -corresponds to release 8.0p2, which is the second patch update for Tcl -8.0. Tcl 8.0 is a major new release that replaces the core of the -interpreter with an on-the-fly bytecode compiler to improve execution -speed. It also includes several other new features such as namespaces -and binary I/O, plus many bug fixes. The compiler introduces a few -incompatibilities that may affect existing Tcl scripts; the -incompatibilities are relatively obscure but may require modifications -to some old scripts before they can run with this version. The compiler -introduces many new C-level APIs, but the old APIs are still supported. -See below for more details. This patch release fixes various bugs in -Tcl 8.0; there are no feature changes relative to Tcl 8.0. +constitutes the 8.1a2 release, which is the second alpha release for Tcl +8.1. This release is still in experimental form and is intended for expert +early adopters who are willing to help us find and fix problems. The +release is certain to contain bugs and is not yet feature-complete: we +will probably add new features or change some of the existing features +before the final 8.1 release. Please let us know about any problems you +uncover. + +Tcl 8.1 includes three major new features: Unicode support (all +internal strings are now stored in UTF-8 form), a new regular +expression matcher with most of the Perl features, and support for +multithreading. For details on features, incompatibilities, and +potential problems with this release, see the Tcl/Tk 8.1 Web page at +http://sunscript.sun.com/TclTkCore/8.1.html or refer to the "changes" +file in this directory, which contains a historical record of all +changes to Tcl. 2. Documentation ---------------- @@ -67,14 +72,12 @@ or Win 3.1 with Win32s). Before trying to compile Tcl you should do the following things: (a) Check for a binary release. Pre-compiled binary releases are - available now for PCs, Macintoshes, and several flavors of UNIX. - Binary releases are much easier to install than source releases. - To find out whether a binary release is available for your - platform, check the home page for SunScript - (http://sunscript.sun.com) under "Tech Corner". Also, check in - the FTP directory from which you retrieved the base - distribution. Some of the binary releases are available freely, - while others are for sale. + available now for PCs and Macintoshes. Binary releases are much + easier to install than source releases. To find out whether a + binary release is available for your platform, check the Tcl/Tk + 8.1 Web page at http://sunscript.sun.com/TclTkCore/8.1.html. + Also, check in the FTP directory from which you retrieved the + base distribution. (b) Make sure you have the most recent patch release. Look in the FTP directory from which you retrieved this distribution to see @@ -82,30 +85,30 @@ Before trying to compile Tcl you should do the following things: without changing any features, so you should normally use the latest patch release for the version of Tcl that you want. Patch releases are available in two forms. A file like - tcl8.0p2.tar.Z is a complete release for patch level 2 of Tcl - version 8.0. If there is a file with a higher patch level than + tcl8.1p2.tar.Z is a complete release for patch level 2 of Tcl + version 8.1. If there is a file with a higher patch level than this release, just fetch the file with the highest patch level and use it. Patches are also available in the form of patch files that just contain the changes from one patch level to another. These - files will have names like tcl8.0p1.patch, tcl8.0p2.patch, etc. They + files will have names like tcl8.1p1.patch, tcl8.1p2.patch, etc. They may also have .gz or .Z extensions to indicate compression. To use one of these files, you apply it to an existing release with the "patch" program. Patches must be applied in order: - tcl8.0p1.patch must be applied to an unpatched Tcl 8.0 release - to produce a Tcl 8.0p1 release; tcl8.0p2.patch can then be - applied to Tcl8.0p1 to produce Tcl 8.0p2, and so on. To apply an - uncompressed patch file such as tcl8.0p1.patch, invoke a shell + tcl8.1p1.patch must be applied to an unpatched Tcl 8.1 release + to produce a Tcl 8.1p1 release; tcl8.1p2.patch can then be + applied to Tcl8.1p1 to produce Tcl 8.1p2, and so on. To apply an + uncompressed patch file such as tcl8.1p1.patch, invoke a shell command like the following from the directory containing this file: - patch -p < tcl8.0p1.patch + patch -p < tcl8.1p1.patch If the patch file has a .gz extension, invoke a command like the following: - gunzip -c tcl8.0p1.patch.gz | patch -p + gunzip -c tcl8.1p1.patch.gz | patch -p If the patch file has a .Z extension, it was compressed with compress. To apply it, invoke a command like the following: - zcat tcl8.0p1.patch.Z | patch -p + zcat tcl8.1p1.patch.Z | patch -p If you're applying a patch to a release that has already been compiled, then before applying the patch you should cd to the "unix" subdirectory and type "make distclean" to restore the @@ -117,138 +120,7 @@ compiling under UNIX, "win" if you're compiling under Windows, or in the README file in that directory for compiling Tcl, installing it, and running the test suite. -4. Summary of changes in Tcl 8.0 --------------------------------- - -Here are the most significant changes in Tcl 8.0. In addition to these -changes, there are several smaller changes and bug fixes. See the file -"changes" for a complete list of all changes. - - 1. Bytecode compiler. The core of the Tcl interpreter has been - replaced with an on-the-fly compiler that translates Tcl scripts to - byte codes; a new interpreter then executes the byte codes. In - earlier versions of Tcl, strings were used as a universal - representation; in Tcl 8.0 strings are replaced with Tcl_Obj - structures ("objects") that can hold both a string value and an - internal form such as a binary integer or compiled bytecodes. The - new objects make it possible to store information in efficient - internal forms and avoid the constant translations to and from - strings that occurred with the old interpreter. We have not yet - converted all of Tcl to take full advantage of the compiler and - objects and have not converted any of Tk yet, but even so you - should see speedups of 2-3x on many programs and you may see - speedups as much as 10-20x in some cases (such as code that - manipulates long lists). Future releases should achieve even - greater speedups. The compiler introduces only a few minor changes - at the level of Tcl scripts, but it introduces many new C APIs for - managing objects. See, for example, the manual entries doc/*Obj*.3. - - 2. Namespaces. There is a new namespace mechanism based on the - namespace implementation by Michael McLennan of Lucent Technologies. - This includes new "namespace" and "variable" commands. There are - many new C APIs associated with namespaces, but they will not be - exported until Tcl 8.1. Note: the syntax of the namespace command - has been changed slightly since the b1 release. See the changes - file for details. - - 3. Binary I/O. The new object system in Tcl 8.0 supports binary - strings (internally, strings are counted in addition to being null - terminated). There is a new "binary" command for inserting and - extracting data to/from binary strings. Commands such as "puts", - "gets", and "read" commands now operate correctly on binary data. - There is a new variable tcl_platform(byteOrder) to identify the - native byte order for the current host. - - 4. Random numbers. The "expr" command now contains a random number - generator, which can be accessed via the "rand()" and "srand()" math - functions. - - 5. Safe-Tcl enhancements. There is a new "hidden command" - mechanism, implemented with the Tcl commands "interp hide", "interp - expose", "interp invokehidden", and "interp hidden" and the C APIs - Tcl_HideCommand and Tcl_ExposeCommand. There is now support for - safe packages and extension loading, including new library - procedures such as safe::interpCreate (see the manual entry safe.n - for details). - - 6. There is a new package "registry" available under Windows for - accessing the Windows registry. - - 7. There is a new command "file attributes" for getting and setting - things like permissions and owner. There is also a new command - "file nativename" for getting back the platform-specific name for a - particular file. - - 8. There is a new "fcopy" command to copy data between channels. - This replaces and improves upon the not-so-secret unsupported old - command "unsupported0". - - 9. There is a new package "http" for doing GET, POST, and HEAD - requests via the HTTP/1.0 protocol. See the manual entry http.n - for details. - - 10. There are new library procedures for finding word breaks in - strings. See the manual entry library.n for details. - - 11. There are new C APIs Tcl_Finalize (for cleaning up before - unloading the Tcl DLL) and Tcl_Ungets for pushing bytes back into a - channel's input buffer. - - 12. Tcl now supports serial I/O devices on Windows and Unix, with a - new fconfigure -mode option. The Windows driver does not yet - support event-driven I/O. - - 13. The lsort command has new options -dictionary and -index. The - -index option allows for very rapid sorting based on an element - of a list. - - 14. The event notifier has been completely rewritten (again). It - should now allow Tcl to use an external event loop (like Motif's) - when it is embedded in other applications. No script-level - interfaces have changed, but many of the C APIs have. - -Tcl 8.0 introduces the following incompatibilities that may affect Tcl -scripts that worked under Tcl 7.6 and earlier releases: - - 1. Variable and command names may not include the character sequence - "::" anymore: this sequence is now used as a namespace separator. - - 2. The semantics of some Tcl commands have been changed slightly to - maximize performance under the compiler. These incompatibilities - are documented on the Web so that we can keep the list up-to-date. - See the URL http://www.sunlabs.com/research/tcl/compiler.html. - - 3. 2-digit years are now parsed differently by the "clock" command - to handle year 2000 issues better (years 00-38 are treated as - 2000-2038 instead of 1900-1938). - - 4. The old Macintosh commands "cp", "mkdir", "mv", "rm", and "rmdir" - are no longer supported; all of these features are now available on - all platforms via the "file" command. - - 5. The variable tcl_precision is now shared between interpreters - and defaults to 12 digits instead of 6; safe interpreters cannot - modify tcl_precision. The new object system in Tcl 8.0 causes - floating-to-string conversions (and the associated rounding) to - occur much less often than in Tcl 7.6, which can sometimes cause - behavioral changes. - - 6. The C APIs associated with the notifier have changed substantially. - - 7. The procedures Tcl_CreateModalTimeout and Tcl_DeleteModalTimeout - have been removed. - - 8. Tcl_CreateFileHandler and Tcl_DeleteFileHandler now take Unix - fd's and are only supported on the Unix platform - - 9. The C APIs for creating channel drivers have changed as part of - the new notifier implementation. The Tcl_File interfaces have been - removed. Tcl_GetChannelFile has been replaced with - Tcl_GetChannelHandle. Tcl_MakeFileChannel now takes a platform- - specific file handle. Tcl_DriverGetOptionProc procedures now take - an additional interp argument. - -5. Tcl newsgroup +4. Tcl newsgroup ----------------- There is a network news group "comp.lang.tcl" intended for the exchange @@ -264,7 +136,7 @@ general interest. A bad e-mail return address may prevent you from getting answers to your questions. You may have to reconfigure your news reading software to ensure that it is supplying valid e-mail addresses. -6. Tcl contributed archive +5. Tcl contributed archive -------------------------- Many people have created exciting packages and applications based on Tcl @@ -275,7 +147,7 @@ in the directory "/pub/tcl". The archive also contains several FAQ ("frequently asked questions") documents that provide solutions to problems that are commonly encountered by TCL newcomers. -7. Mailing lists +6. Mailing lists ---------------- A couple of Mailing List have been set up to discuss Macintosh or @@ -283,11 +155,11 @@ Windows related Tcl issues. In order to use these Mailing Lists you must have access to the internet. If you have access to the WWW the home pages for these mailing lists are located at the following URLs: - http://www.sunlabs.com/research/tcl/lists/mactcl-list.html + http://www.sunlabs.com/people/raymond.johnson/mactcl-list.html -and- - http://www.sunlabs.com/research/tcl/lists/wintcl-list.html + http://sunscript.sun.com/win/wintcl-list.html The home pages contain information about the lists and an HTML archive of all the past messages on the list. To subscribe send a message to: @@ -307,7 +179,7 @@ subscribing put the line: in the body instead (or wintcl). -8. Support and bug fixes +7. Support and bug fixes ------------------------ We're very interested in receiving bug reports and suggestions for @@ -341,7 +213,7 @@ In addition, Tcl support and training are available commercially from NeoSoft (info@neosoft.com), Computerized Processes Unlimited (gwl@cpu.com), and Data Kinetics (education@dkl.com). -9. Tcl version numbers +8. Tcl version numbers ---------------------- Each Tcl release is identified by two numbers separated by a dot, e.g. diff --git a/changes b/changes index b8672ef..f4acd31 100644 --- a/changes +++ b/changes @@ -1,6 +1,6 @@ Recent user-visible changes to Tcl: -SCCS: @(#) changes 1.338 97/11/25 08:30:52 +SCCS: @(#) changes 1.365 98/02/23 17:21:54 1. No more [command1] [command2] construct for grouping multiple commands on a single command line. @@ -3451,3 +3451,305 @@ Universal Headers V.3.0, so that Tcl will compile with CW Pro 2. -gmt flag set. Thanks to Jan Nijtmans for reporting the problem. (RJ) ----------------- Released 8.0p2, 11/25/97 ----------------------- + +12/3/97 (bug fix/optimization) Removed uneeded and potentially dangerous +instances of double evaluations if "if" and "expr" statements from +the library files. It is recommended that unless you need a double +evaluation you always use "expr {...}" instead of "expr ..." and +"if {...} ..." instead of "if ... ...". It will also be faster +thanks to the byte compiler. (DL) + +---- Shipped as part of the plugin2.0b5 as 8.0p2Plugin1, Dec 8th 97 ---- + +12/8/97 (bug fix) Need to protect the newly accepted channel in an +accept callback on a socket, otherwise the callback may close it and +cause an error, which would cause the C code to attempt to close the +now deleted channel. Bumping the refcount assures that the channel sticks +around to be really closed in this case. (JL) + +12/8/97 (bug fix) Need to protect the channel in a fileevent so that it +is not deleted before the fileevent handler returns. (CS, JL) + +---- Shipped as part of the plugin as 8.0p2Plugin2, Jan 15th 98 ---- + +6/18/97 (new feature) Tcl now supports international character sets: + - All C APIs now accept UTF-8 strings instead of iso8859-1 strings, + wherever you see "char *", unless explicitly noted otherwise. + - All Tcl strings represented in UTF-8, which is a convenient + multi-byte encoding of Unicode. Variable names, procedure names, + and all other values in Tcl may include arbitrary Unicode characters. + For example, the Tcl command "string length" returns how many + Unicode characters are in the argument string. + - For Java compatibility, embedded null bytes in C strings are + represented as \xC080 in UTF-8 strings, but the null byte at the end + of a UTF-8 string remains \0. Thus Tcl strings once again do not + contain null bytes, except for termination bytes. + - For Java compatibility, "\uXXXX" is used in Tcl to enter a Unicode + character. "\u0000" through "\uffff" are acceptable Unicode + characters. + - "\xXX" is used to enter a small Unicode character (between 0 and 255) + in Tcl. + - Tcl automatically translates between UTF-8 and the normal encoding for + the platform during interactions with the system. + - The fconfigure command now supports a -encoding option for specifying + the encoding of an open file or socket. Tcl will automatically + translate between the specified encoding and UTF-8 during I/O. + See the directory library/encoding to find out what encodings are + supported (eventually there will be an "encoding" command that + makes this information more accessible). + - There are several new C APIs that support UTF-8 and various encodings. + See Utf.3 for procedures that translate between Unicode and UTF-8 + and manipulate UTF-8 strings. See Encoding.3 for procedures that + create new encodings and translate between encodings. See + ToUpper.3 for procedures that perform case conversions on UTF-8 + strings. + +9/18/97 (enhancement) Literal objects are now shared by the ByteCode +structures created when compiled different scripts. This saves up to 45% +of the total memory needed for all literals. (BL) + +9/24/97 (bug fixes) Fixed Tcl_ParseCommand parsing of backslash-newline +sequences at start of command words. Suppressed Tcl_EvalDirect error logging +if non-TCL_OK result wasn't an error. (BL) + +10/17/97 (feature enhancement) "~username" now refers to the users' home +directory on Windows (previously always returned failure). (CCS) + +10/20/97 (implementation change) The Tcl parser has been completely rewritten +to make it more modular. It can now be used to parse a script without actually +executing it. The APIs for the new parser are not correctly exported, but +they will eventually be exported and augmented with Tcl commands so that +Tcl scripts can parse other Tcl scripts. (JO) + +10/21/97 (API change) Added "flags" argument to Tcl_EvalObj, removed +Tcl_GlobalEvalObj procedure. Added new procedures Tcl_Eval2 and +Tcl_EvalObjv. (JO) +*** POTENTIAL INCOMPATIBILITY *** + +10/22/97 (API change) Renamed Tcl_ObjSetVar2 and Tcl_ObjGetVar2 to +Tcl_SetObjVar2 and Tcl_GetObjVar2 (for consistency with other C APIs) +and changed the name arguments to be strings instead of objects. (JO) +*** POTENTIAL INCOMPATIBILITY *** + +10/27/97 (enhancement) Bytecode compiler rewritten to use the new Tcl +parser. (BL) + +11/3/97 (New routines) Added Tcl_AppendObjToObj, which appends the +string rep of one Tcl_Obj to another. Added Tcl_GetIndexFromObjStruct, +which is similar to Tcl_GetIndexFromObj, except that you can give an +offset between strings. This allows Tcl_GetIndexFromObjStruct to be +called with a table of records which have strings in them. (SRP) + +12/4/97 (enhancement) New Tcl expression parser added. Added new procedure +Tcl_ParseExpr and new token types TCL_TOKEN_SUB_EXPR and +TCL_TOKEN_OPERATOR. Expression compiler is reimplemented to use this +parser. (BL) + +12/9/97 (bug fix) Tcl_EvalObj() increments/decrements the refcount of the +script object to prevent the object from deleting itself while in the +middle of being evaluated. (CCS) + +12/9/97 (bug fix) Memory leak in Tcl_GetsObjCmd(). (CCS) + +12/11/97 (bug fix) Environment array leaked memory when compiled with +Visual C++. (SS) + +12/11/97 (bug fix) File events and non-blocking I/O did not work on +pipes under Windows. Changed to use threads to achieve non-blocking +behavior. (SS) + +12/18/97 (bug fixes) Fixed segfault in "namespace import"; importing a +procedure that causes a cycle now returns an error. Modified "info procs", +"info args", "info body", and "info default" to return information about +imported procedures as well as procedures defined in a namespace. (BL) + +12/19/97 (enhancement) Added new Tcl_GetString() procedure that can be used +in place of Tcl_GetStringFromObj() if the string representation's length +isn't needed. (BL) + +12/18/97 (bug fix) In the opt argument parsing package: if the description +had only flags, the "too many arguments" case was not detected. The default +value was not used for the special "args" ending argument. (DL) + +1/7/98 (clean up) Moved everything not absolutly necessary out of init.tcl +procs now in auto.tcl and package.tcl can be autoloaded if needed. (DL) + +1/7/98 (enhancement) tcltest made at install time will search for it's +init.tcl where it is, even when using virtual path compilation. (DL) + +1/8/98 (os bug workaround) when needed, using a replacement for memcmp so +string compare "char with high bit set" "char w/o high bit set" returns +the expected value on all platforms. (DL) + +1/8/98 (unix portability/configure) building from .../unix/targetName/ +subdirectories and simply using "../configure" should now work fine. (DL) + +1/14/98 (enhancement) Added new regular expression package that +supports AREs, EREs, and BREs. The new package includes new escape +characters, meta-syntax, and character classes inside brackets. +Regexps involving backslashes may behave differently. (MH) +*** POTENTIAL INCOMPATIBILITY *** + +1/16/98 (os workaround) Under windows, "file volume" was causing chatter +and/or several seconds of hanging when querying empty floppy drives. +Changed implementation to call an empirically-derived function that doesn't +cause this. (CCS) + +1/16/98 (enhancement) Converted regular expressions to a Tcl_Obj type so +their compiled form gets cached automatically. Reduced NSUBEXP from 100 +to 20. (BW) + +1/16/98 (documentation) Change unclear documentation and comments for +functions like Tcl_TranslateFileName() and Tcl_ExternalToUtfDString(). Now +it explicitly says they take an uninitialized or free DString. A DString +that is "empty" or "not holding anything" could have been interpreted as one +currently with a zero length, but with a large dynamically allocated buffer. +(CCS) + +----------------- Released 8.1a1, 1/22/98 ----------------------- + +1/28/98 (new feature) Added a "-direct" optional flag to pkg_mkIndex +to generate direct loading package indexes (such those you need +if you use namespaces and plan on using namespace import just after +package require). pkg_mkIndex still has limitations regarding +package dependencies but errors are now ignored and with -direct, correct +package indexes can be generated even if there are dependencies as long +as the "package provide" are done early enough in the files. (DL) + +1/28/98 (enhancement) Performance tuning of regexp and regsub. (CCS) + +1/28/98 (bug fix) regexp and regsub with "-indices" returned the byte-offsets +of the characters in the UTF-8 representation, not the character offsets +themselves. (CCS) + +1/28/98 (bug fix) "clock format 0 -format %Z -gmt 1" would return the local +timezone string instead of "GMT" on Solaris and Windows. + +1/28/98 (bug fix) Restore tty settings when closing serial device on Unix. +This is good behavior when closing real serial devices, essential when +closing the pseudo-device /dev/tty because the user's terminal settings +would be left useless, in raw mode, when tcl quit. (CCS) + +1/28/98 (bug fix) Tcl_OpenCommandChannel() was modifying the contents of the +argv array passed to it, causing problems for any caller that wanted to +continue to use the argv array after calling Tcl_OpenCommandChannel(). (CCS) + +2/1/98 (bug fix) More bugs with %Z in format string argument to strftime(): +1. Borland always returned empty string. +2. MSVC always returned the timezone string for the current time, not the + timezone string for the specified time. +3. With MSVC, "clock format 0 -format %Z -gmt 1" would return "GMT" the first + time it was called, but would return the current timezone string on all + subsequent calls. (CCS) + +2/1/98 (bug fix) "file stat" was broken on Windows. +1. "file stat" of a root directory (local or network) or a relative path that + resolved to a root directory (c:. when in pwd was c:/) was returning error. +2. "file stat" on a regular file (S_IFREG), the st_mode was sign extended to + a negative int if the platform-dependant type "mode_t" was declared as a + short instead of an unsigned short. +3. "file stat" of a network directory, the st_dev was incorrectly reported + as the id of the last accessed local drive rather than the id of the + network drive. (CCS) + +2/1/98 (bug fix) "file attributes" of a relative path that resolved to a +root directory was returning error. (CCS) + +2/1/98 (bug fix) Change error message when "file attribute" could not +determine the attributes for a file. Previously it would return different +error messages on Unix vs. Windows vs. Mac. (CCS) + +2/4/98 (bug fixes) Fixed several instances of bugs where the parser/compiler +would reach outside the range of allocated memory. Improved the array +lookup algorithm in set compilation. (DL) + +2/5/98 (change) The TCL_PARSE_PART1 flag for Set/Get(Obj)Var2 C APIs is now +deprecated and ignored. The part1 is always parsed when the part2 argument +is NULL. This is to avoid a pattern of errors for extension writers converting +from string based Tcl_SetVar() to new Tcl_SetObjVar2() and who could easily +forget to provide the flag and thus get code working for normal variables +but not for array elements. The performance hit is minimal. A side effect +of that change is that is is no longer possible to create scalar variables +that can't be accessed by tcl scripts because of their invalid name +(ending with parenthesis). Likewise it is also parsed and checked to +ensure that you don't create array elements of array whose name is a valid +array element because they would not be accessible from scripts anyway. +Note: There is still duplicate array elements parsing code. (DL) +*** POTENTIAL INCOMPATIBILITY *** + +2/11/98 (bug fix) Sharing objects between interps, such as by "interp +eval" or "send" could cause a crash later when dereferencing an interp +that had been deleted, given code such as: + set a {set x y} + interp create foo + interp eval foo $a + interp delete foo + unset a +Interp "foo" was gone, but "a" had a internal rep consisting of bytecodes +containing a dangling pointer to "foo". Unsetting "a" would attempt to +return resources back to "foo", causing a crash as random memory was +accessed. The lesson is that that if an object's internal rep depends on +an interp (or any other data structure) it must preserve that data in +some fashion. (CCS) + +2/11/98 (enhancement) The "interp" command was returning inconsistent error +messages when the specified slave interp could not be found. (CCS) + +2/11/98 (bug fix) Result codes like TCL_BREAK and TCL_CONTINUE were not +propagating through the master/slave interp boundaries, such as "interp +eval" and "interp alias". TCL_OK, TCL_ERROR, and non-standard codes like +teh integer 57 work. There is still a question as to whether TCL_RETURN +can/should propagate. (CCS) + +2/11/98 (bug fix) TclCompileScript() was derefering memory 1 byte before +start of the string to compile, looking for ']'. (CCS,DL) + +2/11/98 (bug fix) Tcl_Eval2() was derefering memory 1 byte before start +of the string to eval, looking for ']'. (CCS,DL) + +2/11/98 (bug fix) Compiling "set a(b" was running off end of string. (CCS,DL) + +2/11/98 (bug fix) Windows initialization code was dereferencing +uninitialized memory if TCL_LIBRARY environment didn't exist. (CCS) + +2/11/98 (bug fix) Windows "registry" command was dereferencing +uninitialized memory when constructing the $errorCode for a failed +registry call. (CCS) + +2/11/98 (enhancement) Eliminate the TCL_USE_TIMEZONE_VAR definition from +configure.in, because it was the same information as the already existing +HAVE_TM_ZONE definition. The lack of HAVE_TM_ZONE is used to work around a +Solaris and Windows bug where "clock format [clock sec] -format %Z -gmt 1" +produces the local timezone string instead of "GMT". (CCS) + +2/11/98 (bug fix) Memleaks and dereferencing of uninitialized memory in +regexp if an error occurred while compiling a regular expression. (CCS). + +2/18/98 (new feature) Added mutexes and thread local storage in order +to make Tcl thread safe. For testing purposes, there is a testthread +command that creates a new thread and an interpreter inside it. See +thread.test for examples, but this script-level interface is not fixed. +Each thread has its own notifier instance to manage its own events, +and threads can post messages to each other's message queue. +This uses pthreads on UNIX, and native thread support on other platforms. +You enable this by configuring with --enable-threads. Note that at +this time *Tk* is still not thread safe. Special thanks to +Richard Hipp: his earlier implementation inspired this work. (BW, SS, JI) + +2/18/98 (hidden feature change) The way the env() array is shared among +interpreters changed. Updates to env used to trigger write traces in +other interpreters. This undocumented feature is no longer implemented. +Instead, variable tracing is used to keep the C-level environ array in sync +with the Tcl-level env array. This required adding TCL_TRACE_ARRAY support +to Tcl_TraceVar2 so that array names works properly. (BW) +*** POTENTIAL INCOMPATIBILITY *** + +2/18/98 (enhancement) Conditional compilation for unix systems (e.g., +IRIX, SCO) that use f_bsize instead of st_blksize to determine disk block +size. (CCS) + +2/23/98 (bug fix) Fixed the emulation of polling selects in the threaded +version of the Unix notifier. The bug was showing up on a multiprocessor +as starvation of the notifier thread. (BW) + +----------------- Released 8.1a2, Feb 23 1998 ----------------------- diff --git a/compat/memcmp.c b/compat/memcmp.c new file mode 100644 index 0000000..09a5724 --- /dev/null +++ b/compat/memcmp.c @@ -0,0 +1,61 @@ +/* + * memcmp.c -- + * + * Source code for the "memcmp" library routine. + * + * Copyright (c) 1998 Sun Microsystems, Inc. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * SCCS: @(#) memcmp.c 1.2 98/01/19 10:48:58 + */ + +#include "tcl.h" +#include "tclPort.h" + +/* + * Here is the prototype just in case it is not included + * in tclPort.h. + */ + +int memcmp _ANSI_ARGS_((CONST VOID *s1, + CONST VOID *s2, size_t n)); + +/* + *---------------------------------------------------------------------- + * + * memcmp -- + * + * Compares two bytes sequences. + * + * Results: + * compares its arguments, looking at the first n + * bytes (each interpreted as an unsigned char), and returns + * an integer less than, equal to, or greater than 0, accord- + * ing as s1 is less than, equal to, or + * greater than s2 when taken to be unsigned 8 bit numbers. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +memcmp(s1, s2, n) + CONST VOID *s1; /* First string. */ + CONST VOID *s2; /* Second string. */ + size_t n; /* Length to compare. */ +{ + unsigned char u1, u2; + + for ( ; n-- ; s1++, s2++) { + u1 = * (unsigned char *) s1; + u2 = * (unsigned char *) s2; + if ( u1 != u2) { + return (u1-u2); + } + } + return 0; +} diff --git a/compat/stdlib.h b/compat/stdlib.h index 059ea29..5ffda0e 100644 --- a/compat/stdlib.h +++ b/compat/stdlib.h @@ -9,12 +9,12 @@ * declare all the procedures needed here (such as strtod). * * Copyright (c) 1991 The Regents of the University of California. - * Copyright (c) 1994 Sun Microsystems, Inc. + * Copyright (c) 1994-1998 Sun Microsystems, Inc. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * SCCS: @(#) stdlib.h 1.10 96/02/15 14:43:54 + * SCCS: @(#) stdlib.h 1.12 98/01/21 21:04:59 */ #ifndef _STDLIB diff --git a/compat/strftime.c b/compat/strftime.c index 7c72557..29702ed 100644 --- a/compat/strftime.c +++ b/compat/strftime.c @@ -8,7 +8,7 @@ * source. See the copyright notice below for details on redistribution * restrictions. The "license.terms" file does not apply to this file. * - * SCCS: @(#) strftime.c 1.4 97/08/07 17:17:02 + * SCCS: @(#) strftime.c 1.5 98/02/02 20:59:19 */ /* @@ -46,7 +46,7 @@ #if defined(LIBC_SCCS) && !defined(lint) /*static char *sccsid = "from: @(#)strftime.c 5.11 (Berkeley) 2/24/91";*/ -static char *rcsid = "$Id: strftime.c,v 1.1 1998/03/26 14:46:31 rjohnson Exp $"; +static char *rcsid = "$Id: strftime.c,v 1.1.2.1 1998/09/21 23:39:53 stanton Exp $"; #endif /* LIBC_SCCS and not lint */ #include @@ -316,7 +316,7 @@ _fmt(format, t) continue; #ifndef MAC_TCL case 'Z': { - char *name = TclpGetTZName(); + char *name = TclpGetTZName(t->tm_isdst); if (name && !_add(name)) { return 0; } diff --git a/compat/string.h b/compat/string.h index 541e159..bb4c6e7 100644 --- a/compat/string.h +++ b/compat/string.h @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * SCCS: @(#) string.h 1.13 96/04/09 22:14:53 + * SCCS: @(#) string.h 1.14 98/01/08 14:24:44 */ #ifndef _STRING @@ -32,8 +32,12 @@ extern char * memchr _ANSI_ARGS_((CONST VOID *s, int c, size_t n)); extern int memcmp _ANSI_ARGS_((CONST VOID *s1, CONST VOID *s2, size_t n)); extern char * memcpy _ANSI_ARGS_((VOID *t, CONST VOID *f, size_t n)); +#ifdef NO_MEMMOVE +#define memmove(d, s, n) bcopy ((s), (d), (n)) +#else extern char * memmove _ANSI_ARGS_((VOID *t, CONST VOID *f, size_t n)); +#endif extern char * memset _ANSI_ARGS_((VOID *s, int c, size_t n)); extern int strcasecmp _ANSI_ARGS_((CONST char *s1, diff --git a/doc/Backslash.3 b/doc/Backslash.3 index e7ac1f7..8823e41 100644 --- a/doc/Backslash.3 +++ b/doc/Backslash.3 @@ -5,10 +5,10 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) Backslash.3 1.16 96/03/25 19:57:09 +'\" SCCS: @(#) Backslash.3 1.17 97/10/15 10:16:29 '\" .so man.macros -.TH Tcl_Backslash 3 "" Tcl "Tcl Library Procedures" +.TH Tcl_Backslash 3 "8.1" Tcl "Tcl Library Procedures" .BS .SH NAME Tcl_Backslash \- parse a backslash sequence @@ -30,16 +30,24 @@ the backslash character. .SH DESCRIPTION .PP -This is a utility procedure used by several of the Tcl -commands. It parses a backslash sequence and returns -the single character corresponding to the sequence. -\fBTcl_Backslash\fR modifies \fI*countPtr\fR to contain the number -of characters in the backslash sequence. +.VS 8.1 +The use of \fBTcl_Backslash\fR is deprecated in favor of +\fBTcl_UtfBackslash\fR. .PP -See the Tcl manual entry for information on the valid -backslash sequences. -All of the sequences described in the Tcl -manual entry are supported by \fBTcl_Backslash\fR. +This is a utility procedure provided for backwards compatibilty with +non-internationalized Tcl extensions. It parses a backslash sequence and +returns the low byte of the Unicode character corresponding to the sequence. +.VE +\fBTcl_Backslash\fR modifies \fI*countPtr\fR to contain the number of +characters in the backslash sequence. +.PP +See the Tcl manual entry for information on the valid backslash sequences. +All of the sequences described in the Tcl manual entry are supported by +\fBTcl_Backslash\fR. +.VS 8.1 br +.SH "SEE ALSO" +Tcl(n), Tcl_UtfBackslash(3) +.VE .SH KEYWORDS backslash, parse diff --git a/doc/ByteArrObj.3 b/doc/ByteArrObj.3 new file mode 100644 index 0000000..ccc304d --- /dev/null +++ b/doc/ByteArrObj.3 @@ -0,0 +1,91 @@ +'\" +'\" Copyright (c) 1997 Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) ByteArrObj.3 1.2 97/10/20 15:29:23 +'\" +.so man.macros +.TH Tcl_ByteArrayObj 3 8.1 Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_NewByteArrayObj, Tcl_SetByteArrayObj, Tcl_GetByteArrayFromObj, Tcl_SetByteArrayLength \- manipulate Tcl objects as a arrays of bytes +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +Tcl_Obj * +\fBTcl_NewByteArrayObj\fR(\fIbytes, length\fR) +.sp +void +\fBTcl_SetByteArrayObj\fR(\fIobjPtr, bytes, length\fR) +.sp +unsigned char * +\fBTcl_GetByteArrayFromObj\fR(\fIobjPtr, lengthPtr\fR) +.sp +unsigned char * +\fBTcl_SetByteArrayLength\fR(\fIobjPtr, length\fR) +.SH ARGUMENTS +.AS "unsigned char" *lengthPtr in/out +.AP "unsigned char" *bytes in +The array of bytes used to initialize or set a byte-array object. +.AP int length in +The length of the array of bytes. It must be >= 0. +.AP Tcl_Obj *objPtr in/out +For \fBTcl_SetByteArrayObj\fR, this points to the object to be converted to +byte-array type. For \fBTcl_GetByteArrayFromObj\fR and +\fBTcl_SetByteArrayLength\fR, this points to the object from which to get +the byte-array value; if \fIobjPtr\fR does not already point to a byte-array +object, it will be converted to one. +.AP int *lengthPtr out +If non-NULL, filled with the length of the array of bytes in the object. +.BE + +.SH DESCRIPTION +.PP +These procedures are used to create, modify, and read Tcl byte-array objects +from C code. Byte-array objects are typically used to hold the +results of binary IO operations or data structures created with the +\fBbinary\fR command. In Tcl, an array of bytes is not equivalent to a +string. Conceptually, a string is an array of Unicode characters, while a +byte-array is an array of 8-bit quantities with no implicit meaning. +Accesser functions are provided to get the string representation of a +byte-array or to convert an arbitrary object to a byte-array. Obtaining the +string representation of a byte-array object (by calling +\fBTcl_GetStringFromObj\fR) produces a properly formed UTF-8 sequence with a +one-to-one mapping between the bytes in the internal representation and the +UTF-8 characters in the string representation. +.PP +\fBTcl_NewByteArrayObj\fR and \fBTcl_SetByteArrayObj\fR will +create a new object of byte-array type or modify an existing object to have a +byte-array type. Both of these procedures set the object's type to be +byte-array and set the object's internal representation to a copy of the +array of bytes given by \fIbytes\fR. \fBTcl_NewByteArrayObj\fR returns a +pointer to a newly allocated object with a reference count of zero. +\fBTcl_SetByteArrayObj\fR invalidates any old string representation and, if +the object is not already a byte-array object, frees any old internal +representation. +.PP +\fBTcl_GetByteArrayFromObj\fR converts a Tcl object to byte-array type and +returns a pointer to the object's new internal representation as an array of +bytes. The length of this array is stored in \fIlengthPtr\fR if +\fIlengthPtr\fR is non-NULL. The storage for the array of bytes is owned by +the object and should not be freed. The contents of the array may be +modified by the caller only if the object is not shared and the caller +invalidates the string representation. +.PP +\fBTcl_SetByteArrayLength\fR converts the Tcl object to byte-array type +and changes the length of the object's internal representation as an +array of bytes. If \fIlength\fR is greater than the space currently +allocated for the array, the array is reallocated to the new length; the +newly allocated bytes at the end of the array have arbitrary values. If +\fIlength\fR is less than the space currently allocated for the array, +the length of array is reduced to the new length. The return value is a +pointer to the object's new array of bytes. + +.SH "SEE ALSO" +Tcl_GetStringFromObj, Tcl_NewObj, Tcl_IncrRefCount, Tcl_DecrRefCount + +.SH KEYWORDS +object, byte array, utf, unicode, internationalization diff --git a/doc/CrtChannel.3 b/doc/CrtChannel.3 index 354665a..dbec17b 100644 --- a/doc/CrtChannel.3 +++ b/doc/CrtChannel.3 @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) CrtChannel.3 1.29 97/06/20 13:37:45 +'\" SCCS: @(#) CrtChannel.3 1.30 97/12/11 13:10:02 .so man.macros .TH Tcl_CreateChannel 3 8.0 Tcl "Tcl Library Procedures" .BS @@ -221,16 +221,19 @@ typedef struct Tcl_ChannelType { Tcl_DriverGetOptionProc *\fIgetOptionProc\fR; Tcl_DriverWatchProc *\fIwatchProc\fR; Tcl_DriverGetHandleProc *\fIgetHandleProc\fR; + Tcl_DriverClose2Proc *\fIclose2Proc\fR; } Tcl_ChannelType; .CE .VE .PP The driver must provide implementations for all functions except -\fIblockModeProc\fR, \fIseekProc\fR, \fIsetOptionProc\fR, and -\fIgetOptionProc\fR, which may be specified as NULL to indicate that the -channel does not support seeking. Other functions that can not be -implemented for this type of device should return \fBEINVAL\fR when invoked -to indicate that they are not implemented. +\fIblockModeProc\fR, \fIseekProc\fR, \fIsetOptionProc\fR, +.VS +\fIgetOptionProc\fR, and \fIclose2Proc\fR, which may be specified as +.VE +NULL. Other functions that can not be implemented for this type of +device should return \fBEINVAL\fR when invoked to indicate that they +are not implemented. .SH TYPENAME .PP @@ -264,7 +267,7 @@ For some device types, the blocking and nonblocking behavior can be implemented by the underlying operating system; for other device types, the behavior must be emulated in the channel driver. -.SH CLOSEPROC +.SH CLOSEPROC AND CLOSE2PROC .PP The \fIcloseProc\fR field contains the address of a function called by the generic layer to clean up driver-related information when the channel is @@ -286,6 +289,35 @@ calling the \fIcloseProc\fR. If the close operation is successful, the procedure should return zero; otherwise it should return a nonzero POSIX error code. In addition, if an error occurs and \fIinterp\fR is not NULL, the procedure should store an error message in \fIinterp->result\fR. +.PP +.VS +Alternatively, channels that support closing the read and write sides +independently may set \fIcloseProc\fR to \fBTCL_CLOSE2PROC\fR and set +\fIclose2Proc\fR to the address of a function that matches the +following prototype: +.PP +.CS +typedef int Tcl_DriverClose2Proc( + ClientData \fIinstanceData\fR, + Tcl_Interp *\fIinterp\fR, + int \fIflags\fR); +.CE +.PP +The \fIclose2Proc\fR will be called with \fIflags\fR set to an OR'ed +combination of \fBTCL_CLOSE_READ\fR or \fBTCL_CLOSE_WRITE\fR to +indicate that the driver should close the read and/or write side of +the channel. The channel driver may be invoked to perform +additional operations on the channel after \fIclose2Proc\fR is +called to close one or both sides of the channel. If \fIflags\fR is +\fB0\fR (zero), the driver should close the channel in the manner +described above for \fIcloseProc\fR. No further operations will be +invoked on this instance after \fIclose2Proc\fR is called with all +flags cleared. In all cases, the \fIclose2Proc\fR function should +return zero if the close operation was successful; otherwise it should +return a nonzero POSIX error code. In addition, if an error occurs and +\fIinterp\fR is not NULL, the procedure should store an error message +in \fIinterp->result\fR. +.VE .SH INPUTPROC .PP diff --git a/doc/CrtObjCmd.3 b/doc/CrtObjCmd.3 index 78fe6f8..0fe9611 100644 --- a/doc/CrtObjCmd.3 +++ b/doc/CrtObjCmd.3 @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) @(#) CrtObjCmd.3 1.10 97/07/31 14:10:38 +'\" SCCS: @(#) @(#) CrtObjCmd.3 1.11 97/11/10 11:19:57 '\" .so man.macros .TH Tcl_CreateObjCommand 3 8.0 Tcl "Tcl Library Procedures" @@ -62,8 +62,10 @@ such that whenever \fIname\fR is invoked as a Tcl command (e.g., via a call to \fBTcl_EvalObj\fR) the Tcl interpreter will call \fIproc\fR to process the command. .PP -\fBTcl_CreateObjCommand\fR will delete any command \fIname\fR -already associated with the interpreter. +\fBTcl_CreateObjCommand\fR deletes any existing command +\fIname\fR already associated with the interpreter +(however see below for an exception where the existing command +is not deleted). It returns a token that may be used to refer to the command in subsequent calls to \fBTcl_GetCommandName\fR. If \fIname\fR contains any \fB::\fR namespace qualifiers, @@ -128,6 +130,17 @@ not modify them. Call \fBTcl_SetObjResult\fR if you want to return something from the \fIobjv\fR array. .PP +Ordinarily, \fBTcl_CreateObjCommand\fR deletes any existing command +\fIname\fR already associated with the interpreter. +However, if the existing command was created by a previous call to +\fBTcl_CreateCommand\fR, +\fBTcl_CreateObjCommand\fR does not delete the command +but instead arranges for the Tcl interpreter to call the +\fBTcl_ObjCmdProc\fR \fIproc\fR in the future. +The old string-based \fBTcl_CmdProc\fR associated with the command +is retained and its address can be obtained by subsequent +\fBTcl_GetCommandInfo\fR calls. This is done for backwards compatibility. +.PP \fIDeleteProc\fR will be invoked when (if) \fIname\fR is deleted. This can occur through a call to \fBTcl_DeleteCommand\fR, \fBTcl_DeleteCommandFromToken\fR, or \fBTcl_DeleteInterp\fR, diff --git a/doc/Encoding.3 b/doc/Encoding.3 new file mode 100644 index 0000000..e1d08d3 --- /dev/null +++ b/doc/Encoding.3 @@ -0,0 +1,464 @@ +'\" +'\" Copyright (c) 1997-1998 Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) Encoding.3 1.8 98/02/18 16:11:20 +'\" +.so man.macros +.TH Tcl_GetEncoding 3 "8.1" Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNames, Tcl_CreateEncoding \- procedures for creating and using encodings. +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +Tcl_Encoding +\fBTcl_GetEncoding\fR(\fIinterp, name\fR) +.sp +void +\fBTcl_FreeEncoding\fR(\fIencoding\fR) +.sp +void +\fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR) +.sp +int +\fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, + dstCharsPtr\fR) +.sp +void +\fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR) +.sp +int +\fBTcl_UtfToExternal\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, + dstCharsPtr\fR) +.sp +char * +\fBTcl_GetEncodingName\fR(\fIencoding\fR) +.sp +int +\fBTcl_SetSystemEncoding\fR(\fIinterp, name\fR) +.sp +void +\fBTcl_GetEncodingNames\fR(\fIinterp\fR) +.sp +Tcl_Encoding +\fBTcl_CreateEncoding\fR(\fItypePtr\fR) +.SH ARGUMENTS +.AS Tcl_EncodingState *dstWrotePtr +.AP Tcl_Interp *interp in +Interpreter to use for error reporting, or NULL if no error reporting is +desired. +.AP "CONST char" *name in +Name of encoding to load. +.AP Tcl_Encoding encoding in +The encoding to query, free, or use for converting text. If \fIencoding\fR is +NULL, the current system encoding is used. +.AP "CONST char" *src in +For the \fBTcl_ExternalToUtf\fR functions, an array of bytes in the +specified encoding that are to be converted to UTF-8. For the +\fBTcl_UtfToExternal\fR functions, an array of UTF-8 characters to be +converted to the specified encoding. +.AP int srcLen in +Length of \fIsrc\fR in bytes. If the length is negative, the +encoding-specific length of the string is used. +.AP Tcl_DString *dstPtr out +Pointer to an uninitialized or free \fBTcl_DString\fR in which the converted +result will be stored. +.AP int flags in +Various flag bits OR-ed together. +TCL_ENCODING_START signifies that the +source buffer is the first block in a (potentially multi-block) input +stream, telling the conversion routine to reset to an initial state and +perform any initialization that needs to occur before the first byte is +converted. TCL_ENCODING_END signifies that the source buffer is the last +block in a (potentially multi-block) input stream, telling the conversion +routine to perform any finalization that needs to occur after the last +byte is converted and then to reset to an initial state. +TCL_ENCODING_STOPONERROR signifies that the conversion routine should +return immediately upon reading a source character that doesn't exist in +the target encoding; otherwise a default fallback character will +automatically be substituted. +.AP Tcl_EncodingState *statePtr in/out +Used when converting a (generally long or indefinite length) byte stream +in a piece by piece fashion. The conversion routine stores its current +state in \fI*statePtr\fR after \fIsrc\fR (the buffer containing the +current piece) has been converted; that state information must be passed +back when converting the next piece of the stream so the conversion +routine knows what state it was in when it left off at the end of the +last piece. May be NULL, in which case the value specified for \fIflags\fR +is ignored and the source buffer is assumed to contain the complete string to +convert. +.AP char *dst out +Buffer in which the converted result will be stored. No more than +\fIdstLen\fR bytes will be stored in \fIdst\fR. +.AP int dstLen in +The maximum length of the output buffer \fIdst\fR in bytes. +.AP int *srcReadPtr out +Filled with the number of bytes from \fIsrc\fR that were actually +converted. This may be less than the original source length if there was +a problem converting some source characters. May be NULL. +.AP int *dstWrotePtr out +Filled with the number of bytes that were actually stored in the output +buffer as a result of the conversion. May be NULL. +.AP int *dstCharsPtr out +Filled with the number of characters that correspond to the number of bytes +stored in the output buffer. May be NULL. +.AP Tcl_EncodingType *typePtr in +Structure that defines a new type of encoding. +.BE +.SH INTRODUCTION +.PP +These routines convert between Tcl's internal character representation, +UTF-8, and character representations used by various operating systems or +file systems, such as Unicode, ASCII, or Shift-JIS. When operating on +strings, such as such as obtaining the names of files or displaying +characters using international fonts, the strings must be translated into +one or possibly multiple formats that the various system calls can use. For +instance, on a Japanese Unix workstation, a user might obtain a filename +represented in the EUC-JP file encoding and then translate the characters to +the jisx0208 font encoding in order to display the filename in a Tk widget. +The purpose of the encoding package is to help bridge the translation gap. +UTF-8 provides an intermediate staging ground for all the various +encodings. In the example above, text would be translated into UTF-8 from +whatever file encoding the operating system is using. Then it would be +translated from UTF-8 into whatever font encoding the display routines +require. +.PP +Some basic encodings are compiled into Tcl. Others can be defined by the +user or dynamically loaded from encoding files in a +platform-independent manner. +.SH DESCRIPTION +.PP +\fBTcl_GetEncoding\fR finds an encoding given its \fIname\fR. The name may +refer to a builtin Tcl encoding, a user-defined encoding registered by +calling \fBTcl_CreateEncoding\fR, or a dynamically-loadable encoding +file. The return value is a token that represents the encoding and can be +used in subsequent calls to procedures such as \fBTcl_GetEncodingName\fR, +\fBTcl_FreeEncoding\fR, and \fBTcl_UtfToExternal\fR. If the name did not +refer to any known or loadable encoding, NULL is returned and an error +message is returned in \fIinterp\fR. +.PP +The encoding package maintains a database of all encodings currently in use. +The first time \fIname\fR is seen, \fBTcl_GetEncoding\fR returns an +encoding with a reference count of 1. If the same \fIname\fR is requested +further times, then the reference count for that encoding is incremented +without the overhead of allocating a new encoding and all its associated +data structures. +.PP +When an \fIencoding\fR is no longer needed, \fBTcl_FreeEncoding\fR +should be called to release it. When an \fIencoding\fR is no longer in use +anywhere (i.e., it has been freed as many times as it has been gotten) +\fBTcl_FreeEncoding\fR will release all storage the encoding was using +and delete it from the database. +.PP +\fBTcl_ExternalToUtfDString\fR converts a source buffer \fIsrc\fR from the +specified \fIencoding\fR into UTF-8. The converted bytes are stored in +\fIdstPtr\fR, which is then NULL terminated. The caller should eventually +call \fBTcl_DStringFree\fR to free any information stored in \fIdstPtr\fR. +When converting, if any of the characters in the source buffer cannot be +represented in the target encoding, a default fallback character will be +used. +.PP +\fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified +\fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the +source buffer and up to \fIdstLen\fR converted bytes are stored in \fIdst\fR. +In all cases, \fI*srcReadPtr\fR is filled with the number of bytes that were +successfully converted from \fIsrc\fR and \fI*dstWrotePtr\fR is filled with +the corresponding number of bytes that were stored in \fIdst\fR. The return +value is one of the following: +.RS +.IP \fBTCL_OK\fR 29 +All bytes of \fIsrc\fR were converted. +.IP \fBTCL_CONVERT_NOSPACE\fR 29 +The destination buffer was not large enough for all of the converted data; as +many characters as could fit were converted though. +.IP \fBTCL_CONVERT_MULTIBYTE\fR 29 +The last fews bytes in the source buffer were the beginning of a multibyte +sequence, but more bytes were needed to complete this sequence. A +subsequent call to the conversion routine should pass a buffer containing +the unconverted bytes that remained in \fIsrc\fR plus some further bytes +from the source stream to properly convert the formerly split-up multibyte +sequence. +.IP \fBTCL_CONVERT_SYNTAX\fR 29 +The source buffer contained an invalid character sequence. This may occur +if the input stream has been damaged or if the input encoding method was +misidentified. +.IP \fBTCL_CONVERT_UNKNOWN\fR 29 +The source buffer contained a character that could not be represented in +the target encoding and TCL_ENCODING_STOPONERROR was specified. +.RE +.LP +\fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 +into the specified \fIencoding\fR. The converted bytes are stored in +\fIdstPtr\fR, which is then terminated with the appropriate encoding-specific +NULL. The caller should eventually call \fBTcl_DStringFree\fR to free any +information stored in \fIdstPtr\fR. When converting, if any of the +characters in the source buffer cannot be represented in the target +encoding, a default fallback character will be used. +.PP +\fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into +the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from +the source buffer and up to \fIdstLen\fR converted bytes are stored in +\fIdst\fR. In all cases, \fI*srcReadPtr\fR is filled with the number of +bytes that were successfully converted from \fIsrc\fR and \fI*dstWrotePtr\fR +is filled with the corresponding number of bytes that were stored in +\fIdst\fR. The return values are the same as the return values for +\fBTcl_ExternalToUtf\fR. +.PP +\fBTcl_GetEncodingName\fR is roughly the inverse of \fBTk_GetEncoding\fR. +Given an \fIencoding\fR, the return value is the \fIname\fR argument that +was used to create the encoding. The string returned by +\fBTcl_GetEncodingName\fR is only guaranteed to persist until the +\fIencoding\fR is deleted. The caller must not modify this string. +.PP +\fBTcl_SetSystemEncoding\fR sets the default encoding that should be used +whenever the user passes a NULL value for the \fIencoding\fR argument to +any of the other encoding functions. If \fIname\fR is NULL, the system +encoding is reset to the default system encoding, \fBbinary\fR. If the +name did not refer to any known or loadable encoding, TCL_ERROR is +returned and an error message is left in \fIinterp\fR. Otherwise, this +procedure increments the reference count of the new system encoding, +decrements the reference count of the old system encoding, and returns +TCL_OK. +.PP +\fBTcl_GetEncodingNames\fR sets the \fIinterp\fR result to a list +consisting of the names of all the encodings that are currently defined +or can be dynamically loaded, searching the encoding path specified by +\fBTcl_SetEncodingPath\fR. This procedure does not ensure that the +dynamically-loadable encoding files contain valid data, but merely that they +exist. +.PP +\fBTcl_CreateEncoding\fR defines a new encoding and registers the C +procedures that are called back to convert between the encoding and +UTF-8. Encodings created by \fBTcl_CreateEncodingType\fR are thereafter +visible in the database used by \fBTcl_GetEncoding\fR. Just as with the +\fBTcl_GetEncoding\fR procedure, the return value is a token that +represents the encoding and can be used in subsequent calls to other +encoding functions. \fBTcl_CreateEncoding\fR returns an encoding with a +reference count of 1. If an encoding with the specified \fIname\fR +already exists, then its entry in the database is replaced with the new +encoding; the token for the old encoding will remain valid and continue +to behave as before, but users of the new token will now call the new +encoding procedures. +.PP +The \fItypePtr\fR argument to \fBTcl_CreateEncoding\fR contains information +about the name of the encoding and the procedures that will be called to +convert between this encoding and UTF-8. It is defined as follows: +.PP +.CS +typedef struct Tcl_EncodingType { + CONST char *\fIencodingName\fR; + Tcl_EncodingConvertProc *\fItoUtfProc\fR; + Tcl_EncodingConvertProc *\fIfromUtfProc\fR; + Tcl_EncodingFreeProc *\fIfreeProc\fR; + ClientData \fIclientData\fR; + int \fInullSize\fR; +} Tcl_EncodingType; +.CE +.PP +The \fIencodingName\fR provides a string name for the encoding, by +which it can be referred in other procedures such as +\fBTcl_GetEncoding\fR. The \fItoUtfProc\fR refers to a callback +procedure to invoke to convert text from this encoding into UTF-8. +The \fIfromUtfProc\fR refers to a callback procedure to invoke to +convert text from UTF-8 into this encoding. The \fIfreeProc\fR refers +to a callback procedure to invoke when this encoding is deleted. The +\fIfreeProc\fR field may be NULL. The \fIclientData\fR contains an +arbitrary one-word value passed to \fItoUtfProc\fR, \fIfromUtfProc\fR, +and \fIfreeProc\fR whenever they are called. Typically, this is a +pointer to a data structure containing encoding-specific information +that can be used by the callback procedures. For instance, two very +similar encodings such as \fBascii\fR and \fBmacRoman\fR may use the +same callback procedure, but use different values of \fIclientData\fR +to control its behavior. The \fInullSize\fR specifies the number of +zero bytes that signify end-of-string in this encoding. It must be +\fB1\fR (for single-byte or multi-byte encodings like ASCII or +Shift-JIS) or \fB2\fR (for double-byte encodings like Unicode). +Constant-sized encodings with 3 or more bytes per character (such as +CNS11643) are not accepted. +.PP +The callback procedures \fItoUtfProc\fR and \fIfromUtfProc\fR should match the +type \fBTcl_EncodingConvertProc\fR: +.PP +.CS +typedef int Tcl_EncodingConvertProc( + ClientData \fIclientData\fR, + CONST char *\fIsrc\fR, + int \fIsrcLen\fR, + int \fIflags\fR, + Tcl_Encoding *\fIstatePtr\fR, + char *\fIdst\fR, + int \fIdstLen\fR, + int *\fIsrcReadPtr\fR, + int *\fIdstWrotePtr\fR, + int *\fIdstCharsPtr\fR); +.CE +.PP +The \fItoUtfProc\fR and \fIfromUtfProc\fR procedures are called by the +\fBTcl_ExternalToUtf\fR or \fBTcl_UtfToExternal\fR family of functions to +perform the actual conversion. The \fIclientData\fR parameter to these +procedures is the same as the \fIclientData\fR field specified to +\fBTcl_CreateEncoding\fR when the encoding was created. The remaining +arguments to the callback procedures are the same as the arguments, +documented at the top, to \fBTcl_ExternalToUtf\fR or +\fBTcl_UtfToExternal\fR, with the following exceptions. If the +\fIsrcLen\fR argument to one of those high-level functions is negative, +the value passed to the callback procedure will be the appropriate +encoding-specific string length of \fIsrc\fR. If any of the \fIsrcReadPtr\fR, +\fIdstWrotePtr\fR, or \fIdstCharsPtr\fR arguments to one of the high-level +functions is NULL, the corresponding value passed to the callback +procedure will be a non-NULL location. +.PP +The callback procedure \fIfreeProc\fR, if non-NULL, should match the type +\fBTcl_EncodingFreeProc\fR: +.CS +typedef void Tcl_EncodingFreeProc( + ClientData \fIclientData\fR); +.CE +.PP +This \fIfreeProc\fR function is called when the encoding is deleted. The +\fIclientData\fR parameter is the same as the \fIclientData\fR field +specified to \fBTcl_CreateEncoding\fR when the encoding was created. +.PP +.SH ENCODING FILES +Space would prohibit precompiling into Tcl every possible encoding +algorithm, so many encodings are stored on disk as dynamically-loadable +encoding files. This behavior also allows the user to create additional +encoding files that can be loaded using the same mechanism. These +encoding files contain information about the tables and/or escape +sequences used to map between an external encoding and Unicode. The +external encoding may consist of single-byte, multi-byte, or double-byte +characters. +.PP +Each dynamically-loadable encoding is represented as a text file. The +initial line of the file, beginning with a ``#'' symbol, is a comment +that provides a human-readable description of the file. The next line +identifies the type of encoding file. It can be one of the following +letters: +.IP "[1] \fBS\fR" +A single-byte encoding, where one character is always one byte long in the +encoding. An example is \fBiso8859-1\fR, used by many European languages. +.IP "[2] \fBD\fR" +A double-byte encoding, where one character is always two bytes long in the +encoding. An example is \fBbig5\fR, used for Chinese text. +.IP "[3] \fBM\fR" +A multi-byte encoding, where one character may be either one or two bytes long. +Certain bytes are a lead bytes, indicating that another byte must follow +and that together the two bytes represent one character. Other bytes are not +lead bytes and represent themselves. An example is \fBshiftjis\fR, used by +many Japanese computers. +.IP "[4] \fBE\fR" +An escape-sequence encoding, specifying that certain sequences of bytes +do not represent characters, but commands that describe how following bytes +should be interpreted. +.PP +The rest of the lines in the file depend on the type. +.PP +Cases [1], [2], and [3] are collectively referred to as table-based encoding +files. The lines in a table-based encoding file are in the same +format as this example taken from the \fBshiftjis\fR encoding (this is not +the complete file): +.CS +\fL +# Encoding file: shiftjis, multi-byte +M +003F 0 40 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D203E007F +0080000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F +FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F +FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F +FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +81 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +300030013002FF0CFF0E30FBFF1AFF1BFF1FFF01309B309C00B4FF4000A8FF3E +FFE3FF3F30FD30FE309D309E30034EDD30053006300730FC20152010FF0F005C +301C2016FF5C2026202520182019201C201DFF08FF0930143015FF3BFF3DFF5B +FF5D30083009300A300B300C300D300E300F30103011FF0B221200B100D70000 +00F7FF1D2260FF1CFF1E22662267221E22342642264000B0203220332103FFE5 +FF0400A200A3FF05FF03FF06FF0AFF2000A72606260525CB25CF25CE25C725C6 +25A125A025B325B225BD25BC203B301221922190219121933013000000000000 +000000000000000000000000000000002208220B2286228722822283222A2229 +000000000000000000000000000000002227222800AC21D221D4220022030000 +0000000000000000000000000000000000000000222022A52312220222072261 +2252226A226B221A223D221D2235222B222C0000000000000000000000000000 +212B2030266F266D266A2020202100B6000000000000000025EF000000000000 +.CE +.PP +The third line of the file is three numbers. The first number is the +fallback character (in base 16) to use when converting from UTF-8 to this +encoding. The second number is a \fB1\fR if this file represents the +encoding for a symbol font, or \fB0\fR otherwise. The last number (in base +10) is how many pages of data follow. +.PP +Subsequent lines in the example above are pages that describe how to map +from the encoding into 2-byte Unicode. The first line in a page identifies +the page number. Following it are 256 double-byte numbers, arranged as 16 +rows of 16 numbers. Given a character in the encoding, the high byte of +that character is used to select which page, and the low byte of that +character is used as an index to select one of the double-byte numbers in +that page \- the value obtained being the corresponding Unicode character. +By examination of the example above, one can see that the characters 0x7E +and 0x8163 in \fBshiftjis\fR map to 203E and 2026 in Unicode, respectively. +.PP +Following the first page will be all the other pages, each in the same +format as the first: one number identifying the page followed by 256 +double-byte Unicode characters. If a character in the encoding maps to the +Unicode character 0000, it means that the character doesn't actually exist. +If all characters on a page would map to 0000, that page can be omitted. +.PP +Case [4] is the escape-sequence encoding file. The lines in an this type of +file are in the same format as this example taken from the \fBiso2022-jp\fR +encoding: +.CS +\fL +.ta 1.5i +# Encoding file: iso2022-jp, escape-driven +E +init {} +final {} +iso8859-1 \\x1b(B +jis0201 \\x1b(J +jis0208 \\x1b$@ +jis0208 \\x1b$B +jis0212 \\x1b$(D +gb2312 \\x1b$A +ksc5601 \\x1b$(C +.CE +.PP +In the file, the first column represents an option and the second column +is the associated value. \fBinit\fR is a string to emit or expect before +the first character is converted, while \fBfinal\fR is a string to emit +or expect after the last character. All other options are names of +table-based encodings; the associated value is the escape-sequence that +marks that encoding. Tcl syntax is used for the values; in the above +example, for instance, ``\fB{}\fR'' represents the empty string and +``\fB\\x1b\fR'' represents character 27. +.PP +When \fBTcl_GetEncoding\fR encounters an encoding \fIname\fR that has not +been loaded, it attempts to load an encoding file called \fIname\fB.enc\fR +from the \fBencoding\fR subdirectory of each directory specified in the +library path \fB$tcl_libPath\fR. If the encoding file exists, but is +malformed, an error message will be left in \fIinterp\fR. +.SH KEYWORDS +utf, encoding, convert + + + diff --git a/doc/Eval.3 b/doc/Eval.3 index f100697..3054a44 100644 --- a/doc/Eval.3 +++ b/doc/Eval.3 @@ -5,88 +5,171 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) Eval.3 1.21 97/01/22 14:22:03 +'\" SCCS: @(#) Eval.3 1.24 97/10/21 11:04:19 '\" .so man.macros -.TH Tcl_Eval 3 7.0 Tcl "Tcl Library Procedures" +.TH Tcl_Eval 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME -Tcl_Eval, Tcl_VarEval, Tcl_EvalFile, Tcl_GlobalEval \- execute Tcl commands +Tcl_EvalObj, Tcl_EvalFile, Tcl_EvalObjv, Tcl_Eval, Tcl_Eval2, Tcl_GlobalEval, Tcl_VarEval \- execute Tcl scripts .SH SYNOPSIS .nf \fB#include \fR .sp +.VS int -\fBTcl_Eval\fR(\fIinterp, cmd\fR) +\fBTcl_EvalObj\fR(\fIinterp, objPtr, flags\fR) .sp int -\fBTcl_VarEval\fR(\fIinterp, string, string, ... \fB(char *) NULL\fR) +\fBTcl_EvalFile\fR(\fIinterp, fileName\fR) .sp int -\fBTcl_EvalFile\fR(\fIinterp, fileName\fR) +\fBTcl_EvalObjv\fR(\fIinterp, objc, objv, command, numBytes, flags\fR) +.sp +int +\fBTcl_Eval\fR(\fIinterp, script\fR) +.sp +int +\fBTcl_Eval2\fR(\fIinterp, script, numBytes, flags\fR) .sp int -\fBTcl_GlobalEval\fR(\fIinterp, cmd\fR) +\fBTcl_GlobalEval\fR(\fIinterp, script\fR) +.sp +int +\fBTcl_VarEval\fR(\fIinterp, string, string, ... \fB(char *) NULL\fR) .SH ARGUMENTS .AS Tcl_Interp **termPtr; .AP Tcl_Interp *interp in -Interpreter in which to execute the command. -A string result will be stored in \fIinterp->result\fR. -.AP char *cmd in -Command (or sequence of commands) to execute. Must be in writable -memory (\fBTcl_Eval\fR makes temporary modifications to the command). -.AP char *string in -String forming part of Tcl command. +Interpreter in which to execute the script. The interpreter's result is +modified to hold the result or error message from the script. +.AP Tcl_Obj *objPtr in +A Tcl object containing the script to execute. +.AP int flags in +ORed combination of flag bits that specify additional options. +\fBTCL_EVAL_GLOBAL\fR and \fBTCL_EVAL_DIRECT\fR are currently supported. .AP char *fileName in -Name of file containing Tcl command string. +Name of a file containing a Tcl script. +.AP int *objc in +The number of objects in the array pointed to by \fIobjPtr\fR; +this is also the number of words in the command. +.AP Tcl_Obj **objv in +Points to an array of pointers to objects; each object holds the +value of a single word in the command to execute. +.AP char *command in +Points to the beginning of the string representation of the +command, if there is one. If the string representation of the +command is unknown then an empty string should be supplied. +This information is used for command tracing. +.AP int numBytes in +The number of bytes in \fIcommand\fR or \fIscript\fR, not including any +null terminating character. If \-1, then all characters up to the +first null byte are used. +.AP char *script in +Points to first byte of script to execute. This script must be in +writable memory: temporary modifications are made to it during +parsing. +.AP char *string in +String forming part of a Tcl script. .BE .SH DESCRIPTION .PP -All four of these procedures execute Tcl commands. -\fBTcl_Eval\fR is the core procedure and is used by all the others. -It executes the commands in the script held by \fIcmd\fR -until either an error occurs or it reaches the end of the script. +The procedures described here are invoked to execute Tcl scripts in +various forms. +\fBTcl_EvalObj\fR is the core procedure and is used by many of the others. +It executes the commands in the script stored in \fIobjPtr\fR +until either an error occurs or the end of the script is reached. +If this is the first time \fIobjPtr\fR has been executed, +its commands are compiled into bytecode instructions +which are then executed. The +bytecodes are saved in \fIobjPtr\fR so that the compilation step +can be skipped if the object is evaluated again in the future. +.PP +The return value from \fBTcl_EvalObj\fR (and all the other procedures +described here) is a Tcl completion code with +one of the values \fBTCL_OK\fR, \fBTCL_ERROR\fR, \fBTCL_RETURN\fR, +\fBTCL_BREAK\fR, or \fBTCL_CONTINUE\fR. +In addition, a result value or error message is left in \fIinterp\fR's +result; it can be retrieved using \fBTcl_GetObjResult\fR. .PP -Note that \fBTcl_Eval\fR and \fBTcl_GlobalEval\fR -have been largely replaced by the -object-based procedures \fBTcl_EvalObj\fR and \fBTcl_GlobalEvalObj\fR. -Those object-based procedures evaluate a script held in a Tcl object -instead of a string. -The object argument can retain the bytecode instructions for the script -and so avoid reparsing the script each time it is executed. -\fBTcl_Eval\fR is implemented using \fBTcl_EvalObj\fR -but is slower because it must reparse the script each time -since there is no object to retain the bytecode instructions. +\fBTcl_EvalFile\fR reads the file given by \fIfileName\fR and evaluates +its contents as a Tcl script. It returns the same information as +\fBTcl_EvalObj\fR. +If the file couldn't be read then a Tcl error is returned to describe +why the file couldn't be read. +.PP +\fBTcl_EvalObjv\fR executes a single pre-parsed command instead of a +script. The \fIobjc\fR and \fIobjv\fR arguments contain the values +of the words for the Tcl command, one word in each object in +\fIobjv\fR. \fBTcl_EvalObjv\fR evaluates the command and returns +a completion code and result just like \fBTcl_EvalObj\fR. +The \fIcommand\fR argument is used only to provide contextual information +to command traces. Note: unlike the other procedures described here, +\fBTcl_EvalObjv\fR does not add any information to the \fBerrorInfo\fR +variable after an error. It is up to the caller to do this, if it +wishes. .PP -The return value from \fBTcl_Eval\fR is one of the Tcl return codes -\fBTCL_OK\fR, \fBTCL_ERROR\fR, \fBTCL_RETURN\fR, \fBTCL_BREAK\fR, or -\fBTCL_CONTINUE\fR, and \fIinterp->result\fR will point to -a string with additional information (a result value or error message). -If an error occurs during compilation, this return information -describes the error. -Otherwise, this return information corresponds to the last command -executed from \fIcmd\fR. +\fBTcl_Eval\fR is similar to \fBTcl_EvalObj\fR except that +the script to be executed is supplied as a string instead of an +object and no compilation occurs. The string is parsed and executed +directly (using \fBTcl_EvalObjv\fR) instead of compiling it and +executing the bytecodes. In situations where it is known that the +script will never be executed again, \fBTcl_Eval\fR may be +faster than \fBTcl_EvalObj\fR. \fBTcl_Eval\fR returns a completion +code and result just like \fBTcl_EvalObj\fR. Note: for backward +compatibility with versions before Tcl 8.0, \fBTcl_Eval\fR +copies the object result in \fIinterp\fR to \fIinterp->result\fR +where it can be accessed directly. This makes \fBTcl_Eval\fR somewhat +slower than \fBTcl_Eval2\fR, which doesn't do the copy. +.PP +\fBTcl_Eval2\fR is an extended version of \fBTcl_Eval\fR that takes +additional arguments \fInumBytes\fR and \fIflags\fR. For the +efficiency reason given above, \fBTcl_Eval2\fR is generally preferred +over \fBTcl_Eval\fR. +.PP +\fBTcl_GlobalEval\fR is an older procedure that is now deprecated. +It is similar to \fBTcl_Eval\fR except that the script is evaluated in +the global namespace and its variable context consists of global +variables only (it ignores any Tcl procedures that are active). +Like \fBTcl_Eval\fR, it leaves a null-terminated +string version of the result in \fIinterp->result\fR where it can +be accessed directly. .PP \fBTcl_VarEval\fR takes any number of string arguments of any length, concatenates them into a single string, then calls \fBTcl_Eval\fR to execute that string as a Tcl command. It returns the result of the command and also modifies -\fIinterp->result\fR in the usual fashion for Tcl commands. +\fIinterp->result\fR in the same way as \fBTcl_Eval\fR. The last argument to \fBTcl_VarEval\fR must be NULL to indicate the end -of arguments. -.PP -\fBTcl_EvalFile\fR reads the file given by \fIfileName\fR and evaluates -its contents as a Tcl command by calling \fBTcl_Eval\fR. It returns -a standard Tcl result that reflects the result of evaluating the file. -If the file couldn't be read then a Tcl error is returned to describe -why the file couldn't be read. +of arguments. \fBTcl_VarEval\fR is now deprecated. + +.SH "FLAG BITS" +Any ORed combination of the following values may be used for the +\fIflags\fR argument to procedures such as \fBTcl_EvalObj\fR: +.TP 23 +\fBTCL_EVAL_DIRECT\fR +This flag is only used by \fBTcl_EvalObj\fR; it is ignored by +other procedures. If this flag bit is set, the script is not +compiled to bytecodes; instead it is executed directly +as is done by \fBTcl_Eval2\fR. The +\fBTCL_EVAL_DIRECT\fR flag is useful in situations where the +contents of an object are going to change immediately, so the +bytecodes won't be reused in a future execution. In this case, +it's faster to execute the script directly. +.TP 23 +\fBTCL_EVAL_GLOBAL\fR +If this flag is set, the script is processed at global level. This +means that it is evaluated in the global namespace and its variable +context consists of global variables only (it ignores any Tcl +procedures at are active). + +.SH "MISCELLANEOUS DETAILS" .PP During the processing of a Tcl command it is legal to make nested calls to evaluate other commands (this is how procedures and some control structures are implemented). If a code other than \fBTCL_OK\fR is returned -from a nested \fBTcl_Eval\fR invocation, +from a nested \fBTcl_EvalObj\fR invocation, then the caller should normally return immediately, passing that same return code back to its caller, and so on until the top-level application is reached. @@ -94,21 +177,18 @@ A few commands, like \fBfor\fR, will check for certain return codes, like \fBTCL_BREAK\fR and \fBTCL_CONTINUE\fR, and process them specially without returning. .PP -\fBTcl_Eval\fR keeps track of how many nested \fBTcl_Eval\fR +\fBTcl_EvalObj\fR keeps track of how many nested \fBTcl_EvalObj\fR invocations are in progress for \fIinterp\fR. If a code of \fBTCL_RETURN\fR, \fBTCL_BREAK\fR, or \fBTCL_CONTINUE\fR is -about to be returned from the topmost \fBTcl_Eval\fR +about to be returned from the topmost \fBTcl_EvalObj\fR invocation for \fIinterp\fR, it converts the return code to \fBTCL_ERROR\fR -and sets \fIinterp->result\fR -to point to an error message indicating that +and sets \fIinterp\fR's result to an error message indicating that the \fBreturn\fR, \fBbreak\fR, or \fBcontinue\fR command was invoked in an inappropriate place. This means that top-level applications should never see a return code -from \fBTcl_Eval\fR other then \fBTCL_OK\fR or \fBTCL_ERROR\fR. - -.SH "SEE ALSO" -Tcl_EvalObj, Tcl_GlobalEvalObj +from \fBTcl_EvalObj\fR other then \fBTCL_OK\fR or \fBTCL_ERROR\fR. +.VE .SH KEYWORDS -command, execute, file, global, object, object result, variable +execute, file, global, object, result, script diff --git a/doc/EvalObj.3 b/doc/EvalObj.3 deleted file mode 100644 index 8cb8f82..0000000 --- a/doc/EvalObj.3 +++ /dev/null @@ -1,91 +0,0 @@ -'\" -'\" Copyright (c) 1996-1997 Sun Microsystems, Inc. -'\" -'\" See the file "license.terms" for information on usage and redistribution -'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" -'\" SCCS: @(#) EvalObj.3 1.4 97/01/22 15:18:44 -'\" -.so man.macros -.TH Tcl_EvalObj 3 8.0 Tcl "Tcl Library Procedures" -.BS -.SH NAME -Tcl_EvalObj, Tcl_GlobalEvalObj \- execute Tcl commands -.SH SYNOPSIS -.nf -\fB#include \fR -.sp -int -\fBTcl_EvalObj\fR(\fIinterp, objPtr\fR) -.sp -int -\fBTcl_GlobalEvalObj\fR(\fIinterp, objPtr\fR) -.SH ARGUMENTS -.AS Tcl_Interp **termPtr; -.AP Tcl_Interp *interp in -Interpreter in which to execute the command. -The command's result will be stored in the interpreter's result object -and can be retrieved using \fBTcl_GetObjResult\fR. -.AP Tcl_Obj *objPtr in -A Tcl object containing a command string -(or sequence of commands in a string) to execute. -.BE - -.SH DESCRIPTION -.PP -These two procedures execute Tcl commands. -\fBTcl_EvalObj\fR is the core procedure -and is used by \fBTcl_GlobalEvalObj\fR. -It executes the commands in the script held by \fIobjPtr\fR -until either an error occurs or it reaches the end of the script. -If this is the first time \fIobjPtr\fR has been executed, -its commands are compiled into bytecode instructions -that are then executed if there are no compilation errors. -.PP -The return value from \fBTcl_EvalObj\fR is one of the Tcl return codes -\fBTCL_OK\fR, \fBTCL_ERROR\fR, \fBTCL_RETURN\fR, \fBTCL_BREAK\fR, or -\fBTCL_CONTINUE\fR, -and a result object containing additional information -(a result value or error message) -that can be retrieved using \fBTcl_GetObjResult\fR. -If an error occurs during compilation, this return information -describes the error. -Otherwise, this return information corresponds to the last command -executed from \fIobjPtr\fR. -.PP -\fBTcl_GlobalEvalObj\fR is similar to \fBTcl_EvalObj\fR except that it -processes the command at global level. -This means that the variable context for the command consists of -global variables only (it ignores any Tcl procedure that is active). -This produces an effect similar to the Tcl command ``\fBuplevel 0\fR''. -.PP -During the processing of a Tcl command it is legal to make nested -calls to evaluate other commands (this is how procedures and -some control structures are implemented). -If a code other than \fBTCL_OK\fR is returned -from a nested \fBTcl_EvalObj\fR invocation, -then the caller should normally return immediately, -passing that same return code back to its caller, -and so on until the top-level application is reached. -A few commands, like \fBfor\fR, will check for certain -return codes, like \fBTCL_BREAK\fR and \fBTCL_CONTINUE\fR, and process them -specially without returning. -.PP -\fBTcl_EvalObj\fR keeps track of how many nested \fBTcl_EvalObj\fR -invocations are in progress for \fIinterp\fR. -If a code of \fBTCL_RETURN\fR, \fBTCL_BREAK\fR, or \fBTCL_CONTINUE\fR is -about to be returned from the topmost \fBTcl_EvalObj\fR -invocation for \fIinterp\fR, -it converts the return code to \fBTCL_ERROR\fR -and sets the interpreter's result object -to point to an error message indicating that -the \fBreturn\fR, \fBbreak\fR, or \fBcontinue\fR command was -invoked in an inappropriate place. -This means that top-level applications should never see a return code -from \fBTcl_EvalObj\fR other then \fBTCL_OK\fR or \fBTCL_ERROR\fR. - -.SH "SEE ALSO" -Tcl_GetObjResult, Tcl_SetObjResult - -.SH KEYWORDS -command, execute, file, global, object, object result, variable diff --git a/doc/Exit.3 b/doc/Exit.3 index 1d3e26d..22976e0 100644 --- a/doc/Exit.3 +++ b/doc/Exit.3 @@ -4,13 +4,13 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) Exit.3 1.8 96/12/10 07:37:23 +'\" SCCS: @(#) Exit.3 1.10 98/02/19 16:29:41 '\" .so man.macros -.TH Tcl_Exit 3 7.7 Tcl "Tcl Library Procedures" +.TH Tcl_Exit 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME -Tcl_Exit, Tcl_Finalize, Tcl_CreateExitHandler, Tcl_DeleteExitHandler \- end the application (and invoke exit handlers) +Tcl_Exit, Tcl_Finalize, Tcl_FinalizeThread, Tcl_CreateExitHandler, Tcl_DeleteExitHandler, Tcl_CreateThreadExitHandler, Tcl_DeleteThreadExitHandler \- end the application or thread (and invoke exit handlers) .SH SYNOPSIS .nf \fB#include \fR @@ -22,10 +22,19 @@ Tcl_Exit, Tcl_Finalize, Tcl_CreateExitHandler, Tcl_DeleteExitHandler \- end the \fBTcl_CreateExitHandler\fR(\fIproc, clientData\fR) .sp \fBTcl_DeleteExitHandler\fR(\fIproc, clientData\fR) +.sp +\fBTcl_ExitThread\fR(\fIstatus\fR) +.sp +\fBTcl_FinalizeThread\fR() +.sp +\fBTcl_CreateThreadExitHandler\fR(\fIproc, clientData\fR) +.sp +\fBTcl_DeleteThreadExitHandler\fR(\fIproc, clientData\fR) .SH ARGUMENTS .AS Tcl_ExitProc clientData .AP int status in -Provides information about why application exited. Exact meaning may +Provides information about why the application or thread exited. +Exact meaning may be platform-specific. 0 usually means a normal exit, any nonzero value usually means that an error occurred. .AP Tcl_ExitProc *proc in @@ -51,7 +60,6 @@ otherwise causes the application to terminate without calling \fBTcl_Exit\fR internally invokes the \fBexit\fR system call, thus it never returns control to its caller. .PP -.VS \fBTcl_Finalize\fR is similar to \fBTcl_Exit\fR except that it does not exit from the current process. It is useful for cleaning up when a process is finished using \fBTcl\fR but @@ -64,10 +72,20 @@ However, to ensure portability, your code should always invoke \fBTcl_Finalize\fR when \fBTcl\fR is being unloaded, to ensure that the code will work on all platforms. \fBTcl_Finalize\fR can be safely called more than once. +.PP +.VS +\fBTcl_ExitThread\fR is used to terminate the current thread and invoke +per-thread exit handlers. This finalization is done by +\fBTcl_FinalizeThread\fR, which you can call if you just want to clean +up per-thread state and invoke the thread exit handlers. +\fBTcl_Finalize\fR calls \fBTcl_FinalizeThread\fR for the current +thread automatically. .VE .PP \fBTcl_CreateExitHandler\fR arranges for \fIproc\fR to be invoked by \fBTcl_Finalize\fR and \fBTcl_Exit\fR. +\fBTcl_CreateThreadExitHandler\fR arranges for \fIproc\fR to be invoked +by \fBTcl_FinalizeThread\fR and \fBTcl_ExitThread\fR. This provides a hook for cleanup operations such as flushing buffers and freeing global memory. \fIProc\fR should match the type \fBTcl_ExitProc\fR: @@ -76,16 +94,18 @@ typedef void Tcl_ExitProc(ClientData \fIclientData\fR); .CE The \fIclientData\fR parameter to \fIproc\fR is a copy of the \fIclientData\fR argument given to -\fBTcl_CreateExitHandler\fR when the callback +\fBTcl_CreateExitHandler\fR or \fBTcl_CreateThreadExitHandler\fR when +the callback was created. Typically, \fIclientData\fR points to a data structure containing application-specific information about what to do in \fIproc\fR. .PP -\fBTcl_DeleteExitHandler\fR may be called to delete a +\fBTcl_DeleteExitHandler\fR and \fBTcl_DeleteThreadExitHandler\fR may be +called to delete a previously-created exit handler. It removes the handler indicated by \fIproc\fR and \fIclientData\fR so that no call to \fIproc\fR will be made. If no such handler exists then -\fBTcl_DeleteExitHandler\fR does nothing. +\fBTcl_DeleteExitHandler\fR or \fBTcl_DeleteThreadExitHandler\fR does nothing. .PP .VS .PP @@ -98,6 +118,14 @@ If extension \fBA\fR registers its exit handlers before loading extension \fBB\fR, this ensures that any exit handlers for \fBB\fR will be executed before the exit handlers for \fBA\fR. .VE +.VS +.PP +\fBTcl_Finalize\fR and \fBTcl_Exit\fR call \fBTcl_FinalizeThread\fR +and the thread exit handlers \fIafter\fR +the process-wide exit handlers. This is because thread finalization shuts +down the I/O channel system, so any attempt at I/O by the global exit +handlers will vanish into the bitbucket. +.VE .SH KEYWORDS -callback, cleanup, dynamic loading, end application, exit, unloading +callback, cleanup, dynamic loading, end application, exit, unloading, thread diff --git a/doc/GetIndex.3 b/doc/GetIndex.3 index 9ca7927..3879103 100644 --- a/doc/GetIndex.3 +++ b/doc/GetIndex.3 @@ -4,10 +4,10 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) @(#) GetIndex.3 1.3 97/07/30 16:21:05 +'\" SCCS: @(#) @(#) GetIndex.3 1.4 97/10/16 17:43:40 '\" .so man.macros -.TH Tcl_GetIndexFromObj 3 8.0 Tcl "Tcl Library Procedures" +.TH Tcl_GetIndexFromObj 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME Tcl_GetIndexFromObj \- lookup string in table of keywords @@ -16,7 +16,14 @@ Tcl_GetIndexFromObj \- lookup string in table of keywords \fB#include \fR .sp int -\fBTcl_GetIndexFromObj\fR(\fIinterp, objPtr, tablePtr, msg, flags, indexPtr\fR) +\fBTcl_GetIndexFromObj\fR(\fIinterp, objPtr, tablePtr, msg, flags, +indexPtr\fR) +.VS +.sp +int +\fBTcl_GetIndexFromObjStruct\fR(\fIinterp, objPtr, tablePtr, offset, +msg, flags, indexPtr\fR) +.VE .SH ARGUMENTS .AS Tcl_Interp **tablePtr .AP Tcl_Interp *interp in @@ -29,6 +36,11 @@ table entry. .AP char **tablePtr in An array of null-terminated strings. The end of the array is marked by a NULL string pointer. +.VS +.AP int offset in +The offset to add to tablePtr to get to the next string in the +list. The end of the array is marked by a NULL string pointer. +.VE .AP char *msg in Null-terminated string describing what is being looked up, such as \fBoption\fR. This string is included in error messages. @@ -68,7 +80,18 @@ is invoked again with the same \fIobjPtr\fR and \fItablePtr\fR arguments (e.g. during a reinvocation of a Tcl command), it returns the matching index immediately without having to redo the lookup operation. Note: \fBTcl_GetIndexFromObj\fR assumes that the entries -in \fItablePtr\fR are static: they must not change between invocations. +in \fItablePtr\fR are static: they must not change between +invocations. +.VS +.PP +\fBTcl_GetIndexFromObjStruct\fR works just like +\fBTcl_GetIndexFromObj\fR, except that instead of treating +\fItablePtr\fR as an array of string pointers, it treats it as the +first in a series of string ptrs that are spaced apart by \fIoffset\fR +bytes. This is particularly useful when processing things like +\fBTk_ConfigurationSpec\fR, whose string keys are in the same place in +each of several array elements. +.VE .SH "SEE ALSO" Tcl_WrongNumArgs diff --git a/doc/ObjSetVar.3 b/doc/ObjSetVar.3 deleted file mode 100644 index 49dd82d..0000000 --- a/doc/ObjSetVar.3 +++ /dev/null @@ -1,162 +0,0 @@ -'\" -'\" Copyright (c) 1996-1997 Sun Microsystems, Inc. -'\" -'\" See the file "license.terms" for information on usage and redistribution -'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" -'\" SCCS: @(#) ObjSetVar.3 1.6 97/05/19 17:35:44 -'\" -.so man.macros -.TH Tcl_ObjSetVar2 3 8.0 Tcl "Tcl Library Procedures" -.BS -.SH NAME -Tcl_ObjSetVar2, Tcl_ObjGetVar2 \- manipulate Tcl variables -.SH SYNOPSIS -.nf -\fB#include \fR -.sp -Tcl_Obj * -\fBTcl_ObjSetVar2\fR(\fIinterp, part1Ptr, part2Ptr, newValuePtr, flags\fR) -.sp -Tcl_Obj * -\fBTcl_ObjGetVar2\fR(\fIinterp, part1Ptr, part2Ptr, flags\fR) -.SH ARGUMENTS -.AS Tcl_Interp *newValuePtr -.AP Tcl_Interp *interp in -Interpreter containing variable. -.AP Tcl_Obj *part1Ptr in -Points to a Tcl object containing the variable's name. -The name may include a series of \fB::\fR namespace qualifiers -to specify a variable in a particular namespace. -May refer to a scalar variable or an element of an array variable. -.AP Tcl_Obj *part2Ptr in -If non-NULL, points to an object containing the name of an element -within an array and \fIpart1Ptr\fR must refer to an array variable. -.AP Tcl_Obj *newValuePtr in -Points to a Tcl object containing the new value for the variable. -.AP int flags in -OR-ed combination of bits providing additional information for -operation. See below for valid values. -.BE - -.SH DESCRIPTION -.PP -These two procedures may be used to read and modify -Tcl variables from C code. -\fBTcl_ObjSetVar2\fR will create a new variable or modify an existing one. -It sets the specified variable to -the object referenced by \fInewValuePtr\fR -and returns a pointer to the object which is the variable's new value. -The returned object may not be the same one -referenced by \fInewValuePtr\fR; -this might happen because variable traces may modify the variable's value. -The reference count for the variable's old value is decremented -and the reference count for its new value is incremented. -If the new value for the variable -is not the same one referenced by \fInewValuePtr\fR -(perhaps as a result of a variable trace), -then \fInewValuePtr\fR's reference count is left unchanged. -The reference count for the returned object is not incremented -to reflect the returned reference. -If the caller needs to keep a reference to the object, -say in a data structure, -it must increment its reference count using \fBTcl_IncrRefCount\fR. -If an error occurs in setting the variable -(e.g. an array variable is referenced -without giving an index into the array), -then NULL is returned. -.PP -The variable name specified to \fBTcl_ObjSetVar2\fR consists of two parts. -\fIpart1Ptr\fR contains the name of a scalar or array variable. -If \fIpart2Ptr\fR is NULL, the variable must be a scalar. -If \fIpart2Ptr\fR is not NULL, -it contains the name of an element in the array named by \fIpart2Ptr\fR. -As a special case, if the flag TCL_PARSE_PART1 is specified, -\fIpart1Ptr\fR may contain both an array and an element name: -if the name contains an open parenthesis and ends with a -close parenthesis, then the value between the parentheses is -treated as an element name (which can have any string value) and -the characters before the first open -parenthesis are treated as the name of an array variable. -If the flag TCL_PARSE_PART1 is given, -\fIpart2Ptr\fR should be NULL since the array and element names -are taken from \fIpart2Ptr\fR. -.PP -The \fIflags\fR argument may be used to specify any of several -options to the procedures. -It consists of an OR-ed combination of any of the following -bits: -.TP -\fBTCL_GLOBAL_ONLY\fR -Under normal circumstances the procedures look up variables as follows: -If a procedure call is active in \fIinterp\fR, -a variable is looked up at the current level of procedure call. -Otherwise, a variable is looked up first in the current namespace, -then in the global namespace. -However, if this bit is set in \fIflags\fR then the variable -is looked up only in the global namespace -even if there is a procedure call active. -If both \fBTCL_GLOBAL_ONLY\fR and \fBTCL_NAMESPACE_ONLY\fR are given, -\fBTCL_GLOBAL_ONLY\fR is ignored. -.TP -\fBTCL_NAMESPACE_ONLY\fR -Under normal circumstances the procedures look up variables as follows: -If a procedure call is active in \fIinterp\fR, -a variable is looked up at the current level of procedure call. -Otherwise, a variable is looked up first in the current namespace, -then in the global namespace. -However, if this bit is set in \fIflags\fR then the variable -is looked up only in the current namespace -even if there is a procedure call active. -.TP -\fBTCL_LEAVE_ERR_MSG\fR -If an error is returned and this bit is set in \fIflags\fR, then -an error message will be left in the interpreter's result, -where it can be retrieved with \fBTcl_GetObjResult\fR -or \fBTcl_GetStringResult\fR. -If this flag bit isn't set then no error message is left -and the interpreter's result will not be modified. -.TP -\fBTCL_APPEND_VALUE\fR -If this bit is set then \fInewValuePtr\fR is appended to the current -value, instead of replacing it. -If the variable is currently undefined, then this bit is ignored. -.TP -\fBTCL_LIST_ELEMENT\fR -If this bit is set, then \fInewValuePtr\fR is converted to a valid -Tcl list element before setting (or appending to) the variable. -A separator space is appended before the new list element unless -the list element is going to be the first element in a list or -sublist (i.e. the variable's current value is empty, or contains -the single character ``{'', or ends in `` }''). -.TP -\fBTCL_PARSE_PART1\fR -If this bit is set, -then \fBTcl_ObjGetVar2\fR and \fBTcl_ObjSetVar2\fR -will parse \fIpart1Ptr\fR -to obtain both an array name and an element name. -If the name in \fIpart1Ptr\fR contains an open parenthesis -and ends with a close parenthesis, -the name is treated as the name of an element of an array; -otherwise, the name in \fIpart1Ptr\fR -is interpreted as the name of a scalar variable. -When this bit is set, -\fIpart2Ptr\fR is ignored. -.PP -\fBTcl_ObjGetVar2\fR returns the value of the specified variable. -Its arguments are treated the same way as those for \fBTcl_ObjSetVar2\fR. -It returns a pointer to the object which is the variable's value. -The reference count for the returned object is not incremented. -If the caller needs to keep a reference to the object, -say in a data structure, -it must increment the reference count using \fBTcl_IncrRefCount\fR. -If an error occurs in setting the variable -(e.g. an array variable is referenced -without giving an index into the array), -then NULL is returned. - -.SH "SEE ALSO" -Tcl_GetObjResult, Tcl_GetStringResult, Tcl_GetVar, Tcl_GetVar2, Tcl_SetVar, Tcl_SetVar2, Tcl_TraceVar, Tcl_UnsetVar, Tcl_UnsetVar2 - -.SH KEYWORDS -array, interpreter, object, scalar, set, unset, variable diff --git a/doc/Object.3 b/doc/Object.3 index 1fed7a6..df930dd 100644 --- a/doc/Object.3 +++ b/doc/Object.3 @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) @(#) Object.3 1.10 97/07/22 11:40:10 +'\" SCCS: @(#) @(#) Object.3 1.11 97/12/19 11:07:43 '\" .so man.macros .TH Tcl_Obj 3 8.0 Tcl "Tcl Library Procedures" @@ -85,7 +85,7 @@ Because of this representation invalidation and regeneration, it is dangerous for extension writers to access \fBTcl_Obj\fR fields directly. It is better to access Tcl_Obj information using -procedures like \fBTcl_GetStringFromObj\fR. +procedures like \fBTcl_GetStringFromObj\fR and \fBTcl_GetString\fR. .PP Objects are allocated on the heap and are referenced using a pointer to their \fBTcl_Obj\fR structure. @@ -138,7 +138,7 @@ The byte array must always have a null after the last byte, at offset \fIlength\fR; this allows string representations that do not contain nulls to be treated as conventional null-terminated C strings. -C programs use \fBTcl_GetStringFromObj\fR to get +C programs use \fBTcl_GetStringFromObj\fR and \fBTcl_GetString\fR to get an object's string representation. If \fIbytes\fR is NULL, the string representation is invalid. @@ -177,7 +177,8 @@ An object typically starts out containing only a string representation: it is untyped and has a NULL \fItypePtr\fR. An object containing an empty string or a copy of a specified string is created using \fBTcl_NewObj\fR or \fBTcl_NewStringObj\fR respectively. -An object's string value is gotten with \fBTcl_GetStringFromObj\fR +An object's string value is gotten with +\fBTcl_GetStringFromObj\fR or \fBTcl_GetString\fR and changed with \fBTcl_SetStringObj\fR. If the object is later passed to a procedure like \fBTcl_GetIntFromObj\fR that requires a specific internal representation, @@ -187,7 +188,7 @@ An object's two representations are duals of each other: changes made to one are reflected in the other. For example, \fBTcl_ListObjReplace\fR will modify an object's internal representation and the next call to \fBTcl_GetStringFromObj\fR -will reflect that change. +or \fBTcl_GetString\fR will reflect that change. .PP Representations are recomputed lazily for efficiency. A change to one representation made by a procedure diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 6cf9b80..a6052eb 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -4,13 +4,13 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) OpenFileChnl.3 1.40 97/09/29 11:22:49 +'\" SCCS: @(#) OpenFileChnl.3 1.43 98/01/13 20:02:00 .so man.macros -.TH Tcl_OpenFileChannel 3 8.0 Tcl "Tcl Library Procedures" +.TH Tcl_OpenFileChannel 3 8.1 Tcl "Tcl Library Procedures" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_Close, Tcl_Read, Tcl_Gets, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_GetChannelOption, Tcl_SetChannelOption \- buffered I/O facilities using channels +Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, \- buffered I/O facilities using channels .SH SYNOPSIS .nf \fB#include \fR @@ -22,7 +22,7 @@ Tcl_Channel .sp Tcl_Channel \fBTcl_OpenCommandChannel\fR(\fIinterp, argc, argv, flags\fR) -.VS +.VS 8.0 .sp Tcl_Channel \fBTcl_MakeFileChannel\fR(\fIhandle, readOrWrite\fR) @@ -40,17 +40,28 @@ int int \fBTcl_Close\fR(\fIinterp, channel\fR) .sp +.VS 8.1 int -\fBTcl_Read\fR(\fIchannel, buf, toRead\fR) +\fBTcl_ReadChars\fR(\fIchannel, readObjPtr, charsToRead, appendFlag\fR) .sp int -\fBTcl_Gets\fR(\fIchannel, lineRead\fR) +\fBTcl_Read\fR(\fIchannel, byteBuf, bytesToRead\fR) .sp int \fBTcl_GetsObj\fR(\fIchannel, lineObjPtr\fR) .sp int -\fBTcl_Write\fR(\fIchannel, buf, toWrite\fR) +\fBTcl_Gets\fR(\fIchannel, lineRead\fR) +.sp +int +\fBTcl_WriteObj\fR(\fIchannel, writeObjPtr\fR) +.sp +int +\fBTcl_WriteChars\fR(\fIchannel, charBuf, bytesToWrite\fR) +.sp +int +\fBTcl_Write\fR(\fIchannel, byteBuf, bytesToWrite\fR) +.VE .sp int \fBTcl_Flush\fR(\fIchannel\fR) @@ -83,37 +94,36 @@ Used for error reporting and to look up a channel registered in it. .AP char *fileName in The name of a local or network file. .AP char *mode in -Specifies how the file is to be accessed. May have any of the -values allowed for the \fImode\fR argument to the Tcl -\fBopen\fR command. -For \fBTcl_OpenCommandChannel\fR, may be NULL. +Specifies how the file is to be accessed. May have any of the values +allowed for the \fImode\fR argument to the Tcl \fBopen\fR command. For +\fBTcl_OpenCommandChannel\fR, may be NULL. .AP int permissions in -POSIX-style permission flags such as 0644. -If a new file is created, these permissions will be set on the -created file. +POSIX-style permission flags such as 0644. If a new file is created, these +permissions will be set on the created file. .AP int argc in The number of elements in \fIargv\fR. .AP char **argv in -Arguments for constructing a command pipeline. -These values have the same meaning as the non-switch arguments -to the Tcl \fBexec\fR command. +Arguments for constructing a command pipeline. These values have the same +meaning as the non-switch arguments to the Tcl \fBexec\fR command. .AP int flags in Specifies the disposition of the stdio handles in pipeline: OR-ed -combination of \fBTCL_STDIN\fR, \fBTCL_STDOUT\fR, \fBTCL_STDERR\fR, -and \fBTCL_ENFORCE_MODE\fR. If \fBTCL_STDIN\fR is set, stdin for -the first child in the pipe is the pipe channel, otherwise it is the same -as the standard input of the invoking process; likewise for -\fBTCL_STDOUT\fR and \fBTCL_STDERR\fR. If \fBTCL_ENFORCE_MODE\fR is not set, -then the pipe can redirect stdio handles to override the stdio handles for -which \fBTCL_STDIN\fR, \fBTCL_STDOUT\fR and \fBTCL_STDERR\fR have been set. -If it is set, then such redirections cause an error. -.VS +combination of \fBTCL_STDIN\fR, \fBTCL_STDOUT\fR, \fBTCL_STDERR\fR, and +\fBTCL_ENFORCE_MODE\fR. If \fBTCL_STDIN\fR is set, stdin for the first child +in the pipe is the pipe channel, otherwise it is the same as the standard +input of the invoking process; likewise for \fBTCL_STDOUT\fR and +\fBTCL_STDERR\fR. If \fBTCL_ENFORCE_MODE\fR is not set, then the pipe can +redirect stdio handles to override the stdio handles for which +\fBTCL_STDIN\fR, \fBTCL_STDOUT\fR and \fBTCL_STDERR\fR have been set. If it +is set, then such redirections cause an error. +.VS 8.0 .AP ClientData handle in Operating system specific handle for I/O to a file. For Unix this is a file descriptor, for Windows it is a HANDLE. .AP int readOrWrite in OR-ed combination of \fBTCL_READABLE\fR and \fBTCL_WRITABLE\fR to indicate what operations are valid on \fIhandle\fR. +.AP char *channelName in +The name of the channel. .VE .AP int *modePtr out Points at an integer variable that will receive an OR-ed combination of @@ -122,26 +132,40 @@ open for reading and writing. .AP Tcl_Channel channel in A Tcl channel for input or output. Must have been the return value from a procedure such as \fBTcl_OpenFileChannel\fR. -.AP char *buf in -An array of bytes in which to store channel input, or from which -to read channel output. -.AP int len in -The length of the input or output. -.AP int atEnd in -If nonzero, store the input at the end of the input queue, otherwise store -it at the head of the input queue. -.AP int toRead in -The number of bytes to read from the channel. -.AP Tcl_DString *lineRead in -A pointer to a Tcl dynamic string in which to store the line read from the -channel. Must have been initialized by the caller. The line read -will be appended to any data already in the dynamic string. -.AP Tcl_Obj *linePtrObj in +.VS 8.1 br +.AP Tcl_Obj *readObjPtr in/out +A pointer to a Tcl Object in which to store the characters read from the +channel. +.AP int charsToRead in +The number of characters to read from the channel. If the channel's encoding +is \fBbinary\fR, this is equivalent to the number of bytes to read from the +channel. +.AP int appendFlag in +If non-zero, data read from the channel will be appended to the object. +Otherwise, the data will replace the existing contents of the object. +.AP char *readBuf out +A buffer in which to store the bytes read from the channel. +.AP int bytesToRead in +The number of bytes to read from the channel. The buffer \fIreadBuf\fR must +be large enough to hold this many bytes. +.AP Tcl_Obj *lineObjPtr in/out A pointer to a Tcl object in which to store the line read from the channel. The line read will be appended to the current value of the object. -.AP int toWrite in -The number of bytes to read from \fIbuf\fR and output to the channel. +.AP Tcl_DString *lineRead in/out +A pointer to a Tcl dynamic string in which to store the line read from the +channel. Must have been initialized by the caller. The line read will be +appended to any data already in the dynamic string. +.AP Tcl_Obj *writeObjPtr in +A pointer to a Tcl Object whose contents will be output to the channel. +.AP "CONST char" *charBuf in +A buffer containing the characters to output to the channel. +.AP char *byteBuf in +A buffer containing the bytes to output to the channel. +.AP int bytesToWrite in +The number of bytes to consume from \fIcharBuf\fR or \fIbyteBuf\fR and +output to the channel. +.VE .AP int offset in How far to move the access point in the channel at which the next input or output operation will be applied, measured in bytes from the position @@ -169,7 +193,7 @@ types. The channel mechanism is extensible to new channel types, by providing a low level channel driver for the new type; the channel driver interface is described in the manual entry for \fBTcl_CreateChannel\fR. The -channel mechanism provides a buffering scheme modelled after +channel mechanism provides a buffering scheme modeled after Unix's standard I/O, and it also allows for nonblocking I/O on channels. .PP @@ -182,7 +206,7 @@ channels, see the manual entry for \fBTcl_CreateChannel\fR. .PP \fBTcl_OpenFileChannel\fR opens a file specified by \fIfileName\fR and returns a channel handle that can be used to perform input and output on -the file. This API is modelled after the \fBfopen\fR procedure of +the file. This API is modeled after the \fBfopen\fR procedure of the Unix standard I/O library. The syntax and meaning of all arguments is similar to those given in the Tcl \fBopen\fR command when opening a file. @@ -190,7 +214,7 @@ If an error occurs while opening the channel, \fBTcl_OpenFileChannel\fR returns NULL and records a POSIX error code that can be retrieved with \fBTcl_GetErrno\fR. In addition, if \fIinterp\fR is non-NULL, \fBTcl_OpenFileChannel\fR -leaves an error message in \fIinterp->result\fR after any error. +leaves an error message in \fIinterp\fR's result after any error. .PP The newly created channel is not registered in the supplied interpreter; to register it, use \fBTcl_RegisterChannel\fR, described below. @@ -310,93 +334,136 @@ left in \fIinterp->result\fR. .PP Note: it is not safe to call \fBTcl_Close\fR on a channel that has been registered using \fBTcl_RegisterChannel\fR; see the documentation for -\fBTcl_RegisterChannel\fR, above, for details. If the channel has ever been -given as the \fBchan\fR argument in a call to \fBTcl_RegisterChannel\fR, -you should instead use \fBTcl_UnregisterChannel\fR, which will internally -call \fBTcl_Close\fR when all calls to \fBTcl_RegisterChannel\fR have been -matched by corresponding calls to \fBTcl_UnregisterChannel\fR. +\fBTcl_RegisterChannel\fR, above, for details. If the channel has ever +been given as the \fBchan\fR argument in a call to +\fBTcl_RegisterChannel\fR, you should instead use +\fBTcl_UnregisterChannel\fR, which will internally call \fBTcl_Close\fR +when all calls to \fBTcl_RegisterChannel\fR have been matched by +corresponding calls to \fBTcl_UnregisterChannel\fR. -.SH TCL_READ -.PP -\fBTcl_Read\fR consumes up to \fItoRead\fR bytes of data from -\fIchannel\fR and stores it at \fIbuf\fR. -The return value of \fBTcl_Read\fR is the number of characters written -at \fIbuf\fR. -The buffer produced by \fBTcl_Read\fR is not NULL terminated. Its contents -are valid from the zeroth position up to and excluding the position -indicated by the return value. -If an error occurs, the return value is -1 and \fBTcl_Read\fR records -a POSIX error code that can be retrieved with \fBTcl_GetErrno\fR. -.PP -The return value may be smaller than the value of \fItoRead\fR, indicating -that less data than requested was available, also called a \fIshort -read\fR. -In blocking mode, this can only happen on an end-of-file. -In nonblocking mode, a short read can also occur if there is not -enough input currently available: \fBTcl_Read\fR returns a short -count rather than waiting for more data. -.PP -If the channel is in blocking mode, a return value of zero indicates an end -of file condition. If the channel is in nonblocking mode, a return value of -zero indicates either that no input is currently available or an end of -file condition. Use \fBTcl_Eof\fR and \fBTcl_InputBlocked\fR -to tell which of these conditions actually occurred. -.PP -\fBTcl_Read\fR translates platform-specific end-of-line representations -into the canonical \fB\en\fR internal representation according to the -current end-of-line recognition mode. End-of-line recognition and the -various platform-specific modes are described in the manual entry for the -Tcl \fBfconfigure\fR command. +.VS 8.1 br +.SH TCL_READCHARS AND TCL_READ +.PP +\fBTcl_ReadChars\fR consumes bytes from \fIchannel\fR, converting the bytes +to UTF-8 based on the channel's encoding and storing the produced data in +\fIreadObjPtr\fR's string representation. The return value of +\fBTcl_ReadChars\fR is the number of characters, up to \fIcharsToRead\fR, +that were stored in \fIobjPtr\fR. If an error occurs while reading, the +return value is \-1 and \fBTcl_ReadChars\fR records a POSIX error code that +can be retrieved with \fBTcl_GetErrno\fR. +.PP +The return value may be smaller than the value to read, indicating that less +data than requested was available. This is called a \fIshort read\fR. In +blocking mode, this can only happen on an end-of-file. In nonblocking mode, +a short read can also occur if there is not enough input currently +available: \fBTcl_ReadChars\fR returns a short count rather than waiting +for more data. +.PP +If the channel is in blocking mode, a return value of zero indicates an +end-of-file condition. If the channel is in nonblocking mode, a return +value of zero indicates either that no input is currently available or an +end-of-file condition. Use \fBTcl_Eof\fR and \fBTcl_InputBlocked\fR to tell +which of these conditions actually occurred. +.PP +\fBTcl_ReadChars\fR translates the various end-of-line representations into +the canonical \fB\en\fR internal representation according to the current +end-of-line recognition mode. End-of-line recognition and the various +platform-specific modes are described in the manual entry for the Tcl +\fBfconfigure\fR command. +.PP +As a performance optimization, when reading from a channel with the encoding +\fBbinary\fR, the bytes are not converted to UTF-8 as they are read. +Instead, they are stored in \fIreadObjPtr\fR's internal representation as a +byte-array object. The string representation of this object will only be +constructed if it is needed (e.g., because of a call to +\fBTcl_GetStringFromObj\fR). In this way, byte-oriented data can be read +from a channel, manipulated by calling \fBTcl_GetByteArrayFromObj\fR and +related functions, and then written to a channel without the expense of ever +converting to or from UTF-8. +.PP +\fBTcl_Read\fR is similar to \fBTcl_ReadChars\fR, except that it doesn't do +encoding conversions, regardless of the channel's encoding. It is deprecated +and exists for backwards compatibility with non-internationalized Tcl +extensions. It consumes bytes from \fIchannel\fR and stores them in +\fIbuf\fR, performing end-of-line translations on the way. The return value +of \fBTcl_Read\fR is the number of bytes, up to \fItoRead\fR, written in +\fIbuf\fR. The buffer produced by \fBTcl_Read\fR is not NULL terminated. +Its contents are valid from the zeroth position up to and excluding the +position indicated by the return value. -.SH TCL_GETS AND TCL_GETSOBJ -.PP -\fBTcl_Gets\fR reads a line of input from a channel and appends all of -the characters of the line except for the terminating end-of-line character(s) -to the dynamic string given by \fIdsPtr\fR. -The end-of-line character(s) are read and discarded. +.SH TCL_GETSOBJ AND TCL_GETS +.PP +\fBTcl_GetsObj\fR consumes bytes from \fIchannel\fR, converting the bytes to +UTF-8 based on the channel's encoding, until a full line of input has been +seen. If the channel's encoding is \fBbinary\fR, each byte read from the +channel is treated as an individual Unicode character. All of the +characters of the line except for the terminating end-of-line character(s) +are appended to \fIlineObjPtr\fR's string representation. The end-of-line +character(s) are read and discarded. +.PP +If a line was successfully read, the return value is greater than or equal +to zero and indicates the number of bytes stored in \fIlineObjPtr\fR. If an +error occurs, \fBTcl_GetsObj\fR returns \-1 and records a POSIX error code +that can be retrieved with \fBTcl_GetErrno\fR. \fBTcl_GetsObj\fR also +returns \-1 if the end of the file is reached; the \fBTcl_Eof\fR procedure +can be used to distinguish an error from an end-of-file condition. +.PP +If the channel is in nonblocking mode, the return value can also be \-1 if +no data was available or the data that was available did not contain an +end-of-line character. When \-1 is returned, the \fBTcl_InputBlocked\fR +procedure may be invoked to determine if the channel is blocked because +of input unavailability. +.PP +\fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting +characters are appended to the appended to the dynamic string given by +\fIdsPtr\fR rather than a Tcl object. + +.SH TCL_WRITECHARS, TCL_WRITEOBJ, AND TCL_WRITE .PP -If a line was successfully read, the return value is greater than or -equal to zero, and it indicates the number of characters stored -in the dynamic string. -If an error occurs, \fBTcl_Gets\fR returns -1 and records a POSIX error -code that can be retrieved with \fBTcl_GetErrno\fR. -\fBTcl_Gets\fR also returns -1 if the end of the file is reached; -the \fBTcl_Eof\fR procedure can be used to distinguish an error -from an end-of-file condition. -.PP -If the channel is in nonblocking mode, the return value can also -be -1 if no data was available or the data that was available -did not contain an end-of-line character. -When -1 is returned, the \fBTcl_InputBlocked\fR procedure may be -invoked to determine if the channel is blocked because of input -unavailability. -.PP -\fBTcl_GetsObj\fR is the same as \fBTcl_Gets\fR except the resulting -characters are appended to a Tcl object \fBlineObjPtr\fR rather than a -dynamic string. -.SH TCL_WRITE -.PP -\fBTcl_Write\fR accepts \fItoWrite\fR bytes of data at \fIbuf\fR for output -on \fIchannel\fR. This data may not appear on the output device -immediately. If the data should appear immediately, call \fBTcl_Flush\fR -after the call to \fBTcl_Write\fR, or set the \fB-buffering\fR option on -the channel to \fBnone\fR. If you wish the data to appear as soon as an end -of line is accepted for output, set the \fB\-buffering\fR option on the -channel to \fBline\fR mode. -.PP -The \fItoWrite\fR argument specifies how many bytes of data are provided in -the \fIbuf\fR argument. If it is negative, \fBTcl_Write\fR expects the data +\fBTcl_WriteChars\fR accepts \fIbytesToWrite\fR bytes of character data at +\fIcharBuf\fR. The UTF-8 characters in the buffer are converted to the +channel's encoding and queued for output to \fIchannel\fR. If +\fIbytesToWrite\fR is negative, \fBTcl_WriteChars\fR expects \fIcharBuf\fR to be NULL terminated and it outputs everything up to the NULL. .PP -The return value of \fBTcl_Write\fR is a count of how many -characters were accepted for output to the channel. This is either equal to -\fItoWrite\fR or -1 to indicate that an error occurred. -If an error occurs, \fBTcl_Write\fR also records a POSIX error code -that may be retrieved with \fBTcl_GetErrno\fR. +Data queued for output may not appear on the output device immediately, due +to internal buffering. If the data should appear immediately, call +\fBTcl_Flush\fR after the call to \fBTcl_WriteChars\fR, or set the +\fB\-buffering\fR option on the channel to \fBnone\fR. If you wish the data +to appear as soon as a complete line is accepted for output, set the +\fB\-buffering\fR option on the channel to \fBline\fR mode. +.PP +The return value of \fBTcl_WriteChars\fR is a count of how many bytes were +accepted for output to the channel. This is either greater than zero to +indicate success or \-1 to indicate that an error occurred. If an error +occurs, \fBTcl_WriteChars\fR records a POSIX error code that may be +retrieved with \fBTcl_GetErrno\fR. .PP Newline characters in the output data are translated to platform-specific -end-of-line sequences according to the \fB\-translation\fR option for -the channel. +end-of-line sequences according to the \fB\-translation\fR option for the +channel. This is done even if the channel has no encoding. +.PP +\fBTcl_WriteObj\fR is similar to \fBTcl_WriteChars\fR except it +accepts a Tcl object whose contents will be output to the channel. The +UTF-8 characters in \fIwriteObjPtr\fR's string representation are converted +to the channel's encoding and queued for output to \fIchannel\fR. +As a performance optimization, when writing to a channel with the encoding +\fBbinary\fR, UTF-8 characters are not converted as they are written. +Instead, the bytes in \fIwriteObjPtr\fR's internal representation as a +byte-array object are written to the channel. The byte-array representation +of the object will be constructed if it is needed. In this way, +byte-oriented data can be read from a channel, manipulated by calling +\fBTcl_GetByteArrayFromObj\fR and related functions, and then written to a +channel without the expense of ever converting to or from UTF-8. +.PP +\fBTcl_Write\fR is similar to \fBTcl_WriteChars\fR except that it doesn't do +encoding conversions, regardless of the channel's encoding. It is +deprecated and exists for backwards compatibility with non-internationalized +Tcl extensions. It accepts \fIbytesToWrite\fR bytes of data at +\fIbyteBuf\fR and queues them for output to \fIchannel\fR. If +\fIbytesToWrite\fR is negative, \fBTcl_Write\fR expects \fIbyteBuf\fR to be +NULL terminated and it outputs everything up to the NULL. +.VE .SH TCL_FLUSH .PP @@ -419,14 +486,14 @@ data will be read or written. Buffered output is flushed to the channel and buffered input is discarded, prior to the seek operation. .PP \fBTcl_Seek\fR normally returns the new access point. -If an error occurs, \fBTcl_Seek\fR returns -1 and records a POSIX error +If an error occurs, \fBTcl_Seek\fR returns \-1 and records a POSIX error code that can be retrieved with \fBTcl_GetErrno\fR. After an error, the access point may or may not have been moved. .SH TCL_TELL .PP \fBTcl_Tell\fR returns the current access point for a channel. The returned -value is -1 if the channel does not support seeking. +value is \-1 if the channel does not support seeking. .SH TCL_GETCHANNELOPTION .PP @@ -477,7 +544,7 @@ The call always returns zero if the channel is in blocking mode. buffered in the internal buffers for a channel. If the channel is not open for reading, this function always returns zero. -.VS +.VS 8.0 .SH "PLATFORM ISSUES" .PP The handles returned from \fBTcl_GetChannelHandle\fR depend on the diff --git a/doc/ParseCmd.3 b/doc/ParseCmd.3 new file mode 100644 index 0000000..74cc64e --- /dev/null +++ b/doc/ParseCmd.3 @@ -0,0 +1,426 @@ +'\" +'\" Copyright (c) 1997 Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) ParseCmd.3 1.6 97/12/19 19:11:18 +'\" +.so man.macros +.TH Tcl_ParseCommand 3 8.1 Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_ParseCommand, Tcl_ParseExpr, Tcl_ParseBraces, Tcl_ParseQuotedString, Tcl_ParseVarName, Tcl_FreeParse, Tcl_EvalTokens \- parse Tcl scripts and expressions +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +int +\fBTcl_ParseCommand\fR(\fIinterp, string, numBytes, nested, parsePtr\fR) +.sp +int +\fBTcl_ParseExpr\fR(\fIinterp, string, numBytes, parsePtr\fR) +.sp +int +\fBTcl_ParseBraces\fR(\fIinterp, string, numBytes, parsePtr, append, termPtr\fR) +.sp +int +\fBTcl_ParseQuotedString\fR(\fIinterp, string, numBytes, parsePtr, append, termPtr\fR) +.sp +int +\fBTcl_ParseVarName\fR(\fIinterp, string, numBytes, parsePtr, append\fR) +.sp +\fBTcl_FreeParse\fR(\fIusedParsePtr\fR) +.sp +Tcl_Obj * +\fBTcl_EvalTokens\fR(\fIinterp, tokenPtr, numTokens\fR) +.SH ARGUMENTS +.AS Tcl_Interp *usedParsePtr +.AP Tcl_Interp *interp out +For procedures other than \fBTcl_FreeParse\fR and \fBTcl_EvalTokens\fR, +used only for error reporting; +if NULL, then no error messages are left after errors. +For \fBTcl_EvalTokens\fR, determines the context for evaluating the +script and also is used for error reporting; must not be NULL. +.AP char *string in +Pointer to first character in string to parse. +.AP int numBytes in +Number of bytes in \fIstring\fR, not including any terminating null +character. If less than 0 then the script consists of all characters +in \fIstring\fR up to the first null character. +.AP int nested in +Non-zero means that the script is part of a command substitution so an +unquoted close bracket should be treated as a command terminator. If zero, +close brackets have no special meaning. +.AP int append in +Non-zero means that \fI*parsePtr\fR already contains valid tokens; the new +tokens should be appended to those already present. Zero means that +\fI*parsePtr\fR is uninitialized; any information in it is ignored. +This argument is normally 0. +.AP Tcl_Parse *parsePtr out +Points to structure to fill in with information about the parsed +command, expression, variable name, etc. +Any previous information in this structure +is ignored, unless \fIappend\fR is non-zero in a call to +\fBTcl_ParseBraces\fR, \fBTcl_ParseQuotedString\fR, +or \fBTcl_ParseVarName\fR. +.AP char **termPtr out +If not NULL, points to a location where +\fBTcl_ParseBraces\fR and \fBTcl_ParseQuotedString\fR +will store a pointer to the character +just after the terminating close-brace or close-quote (respectively) +if the parse was successful. +.AP Tcl_Parse *usedParsePtr in +Points to structure that was filled in by a previous call to +\fBTcl_ParseCommand\fR, \fBTcl_ParseExpr\fR, \fBTcl_ParseVarName\fR, etc. +.BE + +.SH DESCRIPTION +.PP +These procedures parse Tcl commands or portions of Tcl commands such as +expressions or references to variables. +Each procedure takes a pointer to a script (or portion thereof) +and fills in the structure pointed to by \fIparsePtr\fR +with a collection of tokens describing the information that was parsed. +The procedures normally return \fBTCL_OK\fR. +However, if an error occurs then they return \fBTCL_ERROR\fR, +leave an error message in \fIinterp's\fR result +(if \fIinterp\fR is not NULL), +and leave nothing in \fIparsePtr\fR. +.PP +\fBTcl_ParseCommand\fR is a procedure that parses Tcl +scripts. Given a pointer to a script, it +parses the first command from the script. If the command was parsed +successfully, \fBTcl_ParseCommand\fR returns \fBTCL_OK\fR and fills in the +structure pointed to by \fIparsePtr\fR with information about the +structure of the command (see below for details). +If an error occurred in parsing the command then +\fBTCL_ERROR\fR is returned, an error message is left in \fIinterp\fR's +result, and no information is left at \fI*parsePtr\fR. +.PP +\fBTcl_ParseExpr\fR parses Tcl expressions. +Given a pointer to a script containing an expression, +\fBTcl_ParseCommand\fR parses the expression. +If the expression was parsed successfully, +\fBTcl_ParseExpr\fR returns \fBTCL_OK\fR and fills in the +structure pointed to by \fIparsePtr\fR with information about the +structure of the expression (see below for details). +If an error occurred in parsing the command then +\fBTCL_ERROR\fR is returned, an error message is left in \fIinterp\fR's +result, and no information is left at \fI*parsePtr\fR. +.PP +\fBTcl_ParseBraces\fR parses a string or command argument +enclosed in braces such as +\fB{hello}\fR or \fB{string \\t with \\t tabs}\fR +from the beginning of its argument \fIstring\fR. +The first character of \fIstring\fR must be \fB{\fR. +If the braced string was parsed successfully, +\fBTcl_ParseBraces\fR returns \fBTCL_OK\fR, +fills in the structure pointed to by \fIparsePtr\fR +with information about the structure of the string +(see below for details), +and stores a pointer to the character just after the terminating \fB}\fR +in the location given by \fI*termPtr\fR. +If an error occurrs while parsing the string +then \fBTCL_ERROR\fR is returned, +an error message is left in \fIinterp\fR's result, +and no information is left at \fI*parsePtr\fR or \fI*termPtr\fR. +.PP +\fBTcl_ParseQuotedString\fR parses a double-quoted string such as +\fB"sum is [expr $a+$b]"\fR +from the beginning of the argument \fIstring\fR. +The first character of \fIstring\fR must be \fB"\fR. +If the double-quoted string was parsed successfully, +\fBTcl_ParseQuotedString\fR returns \fBTCL_OK\fR, +fills in the structure pointed to by \fIparsePtr\fR +with information about the structure of the string +(see below for details), +and stores a pointer to the character just after the terminating \fB"\fR +in the location given by \fI*termPtr\fR. +If an error occurrs while parsing the string +then \fBTCL_ERROR\fR is returned, +an error message is left in \fIinterp\fR's result, +and no information is left at \fI*parsePtr\fR or \fI*termPtr\fR. +.PP +\fBTcl_ParseVarName\fR parses a Tcl variable reference such as +\fB$abc\fR or \fB$x([expr $index + 1])\fR from the beginning of its +\fIstring\fR argument. +The first character of \fIstring\fR must be \fB$\fR. +If a variable name was parsed successfully, \fBTcl_ParseVarName\fR +returns \fBTCL_OK\fR and fills in the structure pointed to by +\fIparsePtr\fR with information about the structure of the variable name +(see below for details). If an error +occurrs while parsing the command then \fBTCL_ERROR\fR is returned, an +error message is left in \fIinterp\fR's result (if \fIinterp\fR isn't +NULL), and no information is left at \fI*parsePtr\fR. +.PP +The information left at \fI*parsePtr\fR +by \fBTcl_ParseCommand\fR, \fBTcl_ParseExpr\fR, \fBTcl_ParseBraces\fR, +\fBTcl_ParseQuotedString\fR, and \fBTcl_ParseVarName\fR +may include dynamically allocated memory. +If these five parsing procedures return \fBTCL_OK\fR +then the caller must invoke \fBTcl_FreeParse\fR to release +the storage at \fI*parsePtr\fR. +These procedures ignore any existing information in +\fI*parsePtr\fR (unless \fIappend\fR is non-zero), +so if repeated calls are being made to any of them +then \fBTcl_FreeParse\fR must be invoked once after each call. +.PP +\fBTcl_EvalTokens\fR evaluates a sequence of parse tokens from a Tcl_Parse +structure. The tokens typically consist +of all the tokens in a word or all the tokens that make up the index for +a reference to an array variable. \fBTcl_EvalTokens\fR performs the +substitutions requested by the tokens, concatenates the +resulting values, and returns the result in a new Tcl_Obj. The +reference count of the object returned as result has been +incremented, so the caller must +invoke \fBTcl_DecrRefCount\fR when it is finished with the object. +If an error occurs while evaluating the tokens (such as a reference to +a non-existent variable) then the return value is NULL and an error +message is left in \fIinterp\fR's result. + +.SH TCL_PARSE STRUCTURE +.PP +\fBTcl_ParseCommand\fR, \fBTcl_ParseExpr\fR, \fBTcl_ParseBraces\fR, +\fBTcl_ParseQuotedString\fR, and \fBTcl_ParseVarName\fR +return parse information in two data structures, Tcl_Parse and Tcl_Token: +.CS +typedef struct Tcl_Parse { + char *\fIcommentStart\fR; + int \fIcommentSize\fR; + char *\fIcommandStart\fR; + int \fIcommandSize\fR; + int \fInumWords\fR; + Tcl_Token *\fItokenPtr\fR; + int \fInumTokens\fR; + ... +} Tcl_Parse; + +typedef struct Tcl_Token { + int \fItype\fR; + char *\fIstart\fR; + int \fIsize\fR; + int \fInumComponents\fR; +} Tcl_Token; +.CE +.PP +The first five fields of a Tcl_Parse structure +are filled in only by \fBTcl_ParseCommand\fR. +These fields are not used by the other parsing procedures. +.PP +\fBTcl_ParseCommand\fR fills in a Tcl_Parse structure +with information that describes one Tcl command and any comments that +precede the command. +If there are comments, +the \fIcommentStart\fR field points to the \fB#\fR character that begins +the first comment and \fIcommentSize\fR indicates the number of bytes +in all of the comments preceding the command, including the newline +character that terminates the last comment. +If the command is not preceded by any comments, \fIcommentSize\fR is 0. +\fBTcl_ParseCommand\fR also sets the \fIcommandStart\fR field +to point to the first character of the first +word in the command (skipping any comments and leading space) and +\fIcommandSize\fR gives the total number of bytes in the command, +including the character pointed to by \fIcommandStart\fR up to and +including the newline, close bracket, or semicolon character that +terminates the command. The \fInumWords\fR field gives the +total number of words in the command. +.PP +All parsing procedures set the remaining fields, +\fItokenPtr\fR and \fInumTokens\fR. +The \fItokenPtr\fR field points to the first in an array of Tcl_Token +structures that describe the components of the entity being parsed. +The \fInumTokens\fR field gives the total number of tokens +present in the array. +Each token contains four fields. +The \fItype\fR field selects one of several token types +that are described below. The \fIstart\fR field +points to the first character in the token and the \fIsize\fR field +gives the total number of characters in the token. Some token types, +such as \fBTCL_TOKEN_WORD\fR and \fBTCL_TOKEN_VARIABLE\fR, consist of +several component tokens, which immediately follow the parent token; +the \fInumComponents\fR field describes how many of these there are. +The \fItype\fR field has one of the following values: +.TP 20 +\fBTCL_TOKEN_WORD\fR +This token ordinarily describes one word of a command +but it may also describe a quoted or braced string in an expression. +The token describes a component of the script that is +the result of concatenating together a sequence of subcomponents, +each described by a separate subtoken. +The token starts with the first non-blank +character of the component (which may be a double-quote or open brace) +and includes all characters in the component up to but not including the +space, semicolon, close bracket, close quote, or close brace that +terminates the component. The \fInumComponents\fR field counts the total +number of sub-tokens that make up the word, including sub-tokens +of \fBTCL_TOKEN_VARIABLE\fR and \fBTCL_TOKEN_BS\fR tokens. +.TP +\fBTCL_TOKEN_SIMPLE_WORD\fR +This token has the same meaning as \fBTCL_TOKEN_WORD\fR, except that +the word is guaranteed to consist of a single \fBTCL_TOKEN_TEXT\fR +sub-token. The \fInumComponents\fR field is always 1. +.TP +\fBTCL_TOKEN_TEXT\fR +The token describes a range of literal text that is part of a word. +The \fInumComponents\fR field is always 0. +.TP +\fBTCL_TOKEN_BS\fR +The token describes a backslash sequence such as \fB\en\fR or \fB\e0xa3\fR. +The \fInumComponents\fR field is always 0. +.TP +\fBTCL_TOKEN_COMMAND\fR +The token describes a command whose result result must be substituted into +the word. The token includes the square brackets that surround the +command. The \fInumComponents\fR field is always 0 (the nested command +is not parsed; call \fBTcl_ParseCommand\fR recursively if you want to +see its tokens). +.TP +\fBTCL_TOKEN_VARIABLE\fR +The token describes a variable substitution, including the +\fB$\fR, variable name, and array index (if there is one) up through the +close parenthesis that terminates the index. This token is followed +by one or more additional tokens that describe the variable name and +array index. If \fInumComponents\fR is 1 then the variable is a +scalar and the next token is a \fBTCL_TOKEN_TEXT\fR token that gives the +variable name. If \fInumComponents\fR is greater than 1 then the +variable is an array: the first sub-token is a \fBTCL_TOKEN_TEXT\fR +token giving the array name and the remaining sub-tokens are +\fBTCL_TOKEN_TEXT\fR, \fBTCL_TOKEN_BS\fR, \fBTCL_TOKEN_COMMAND\fR, and +\fBTCL_TOKEN_VARIABLE\fR tokens that must be concatenated to produce the +array index. The \fInumComponents\fR field includes nested sub-tokens +that are part of \fBTCL_TOKEN_VARIABLE\fR tokens in the array index. +.TP +\fBTCL_TOKEN_SUB_EXPR\fR +The token describes one subexpression of an expression +(or an entire expression). +A subexpression may consist of a value +such as an integer literal, variable substitution, +or parenthesized subexpression; +it may also consist of an operator and its operands. +The token starts with the first non-blank character of the subexpression +up to but not including the space, brace, close-paren, or bracket +that terminates the subexpression. +This token is followed by one or more additional tokens +that describe the subexpression. +If the first sub-token after the \fBTCL_TOKEN_SUB_EXPR\fR token +is a \fBTCL_TOKEN_OPERATOR\fR token, +the subexpression consists of an operator and its token operands. +If the operator has no operands, the subexpression consists of +just the \fBTCL_TOKEN_OPERATOR\fR token. +Each operand is described by a \fBTCL_TOKEN_SUB_EXPR\fR token. +Otherwise, the subexpression is a value described by +one of the token types \fBTCL_TOKEN_WORD\fR, \fBTCL_TOKEN_TEXT\fR, +\fBTCL_TOKEN_BS\fR, \fBTCL_TOKEN_COMMAND\fR, +\fBTCL_TOKEN_VARIABLE\fR, and \fBTCL_TOKEN_SUB_EXPR\fR. +The \fInumComponents\fR field +counts the total number of sub-tokens that make up the subexpression; +this includes the sub-tokens for any nested \fBTCL_TOKEN_SUB_EXPR\fR tokens. +.TP +\fBTCL_TOKEN_OPERATOR\fR +The token describes one operator of an expression +such as \fB&&\fR or \fBhypot\fR. +An \fBTCL_TOKEN_OPERATOR\fR token is always preceeded by a +\fBTCL_TOKEN_SUB_EXPR\fR token +that describes the operator and its operands; +the \fBTCL_TOKEN_SUB_EXPR\fR token's \fInumComponents\fR field +can be used to determine the number of operands. +A binary operator such as \fB*\fR +is followed by two \fBTCL_TOKEN_SUB_EXPR\fR tokens +that describe its operands. +A unary operator like \fB-\fR +is followed by a single \fBTCL_TOKEN_SUB_EXPR\fR token +for its operand. +If the operator is a math function such as \fBlog10\fR, +the \fBTCL_TOKEN_OPERATOR\fR token will give its name and +the following \fBTCL_TOKEN_SUB_EXPR\fR tokens will describe +its operands; +if there are no operands (as with \fBrand\fR), +no \fBTCL_TOKEN_SUB_EXPR\fR tokens follow. +There is one trinary operator, \fB?\fR, +that appears in if-then-else subexpressions +such as \fIx\fB?\fIy\fB:\fIz\fR; +in this case, the \fB?\fR \fBTCL_TOKEN_OPERATOR\fR token +is followed by three \fBTCL_TOKEN_SUB_EXPR\fR tokens for the operands +\fIx\fR, \fIy\fR, and \fIz\fR. +The \fInumComponents\fR field for a \fBTCL_TOKEN_OPERATOR\fR token +is always 0. +.PP +After \fBTcl_ParseCommand\fR returns, the first token pointed to by +the \fItokenPtr\fR field of the +Tcl_Parse structure always has type \fBTCL_TOKEN_WORD\fR or +\fBTCL_TOKEN_SIMPLE_WORD\fR. It is followed by the sub-tokens +that must be concatenated to produce the value of that word. +The next token is the \fBTCL_TOKEN_WORD\fR or \fBTCL_TOKEN_SIMPLE_WORD\fR +token for the second word, followed by sub-tokens for that +word, and so on until all \fInumWords\fR have been accounted +for. +.PP +After \fBTcl_ParseExpr\fR returns, the first token pointed to by +the \fItokenPtr\fR field of the +Tcl_Parse structure always has type \fBTCL_TOKEN_SUB_EXPR\fR. +It is followed by the sub-tokens that must be evaluated +to produce the value of the expression. +Only the token information in the Tcl_Parse structure +is modified: the \fIcommentStart\fR, \fIcommentSize\fR, +\fIcommandStart\fR, and \fIcommandSize\fR fields are not modified +by \fBTcl_ParseExpr\fR. +.PP +After \fBTcl_ParseBraces\fR returns, +the array of tokens pointed to by the \fItokenPtr\fR field of the +Tcl_Parse structure will contain a single \fBTCL_TOKEN_TEXT\fR token +if the braced string does not contain any backslash-newlines. +If the string does contain backslash-newlines, +the array of tokens will contain one or more +\fBTCL_TOKEN_TEXT\fR or \fBTCL_TOKEN_BS\fR sub-tokens +that must be concatenated to produce the value of the string. +If the braced string was just \fB{}\fR +(that is, the string was empty), +the single \fBTCL_TOKEN_TEXT\fR token will have a \fIsize\fR field +containing zero; +this ensures that at least one token appears +to describe the braced string. +Only the token information in the Tcl_Parse structure +is modified: the \fIcommentStart\fR, \fIcommentSize\fR, +\fIcommandStart\fR, and \fIcommandSize\fR fields are not modified +by \fBTcl_ParseBraces\fR. +.PP +After \fBTcl_ParseQuotedString\fR returns, +the array of tokens pointed to by the \fItokenPtr\fR field of the +Tcl_Parse structure depends on the contents of the quoted string. +It will consist of one or more \fBTCL_TOKEN_TEXT\fR, \fBTCL_TOKEN_BS\fR, +\fBTCL_TOKEN_COMMAND\fR, and \fBTCL_TOKEN_VARIABLE\fR sub-tokens. +The array always contains at least one token; +for example, if the argument \fIstring\fR is empty, +the array returned consists of a single \fBTCL_TOKEN_TEXT\fR token +with a zero \fIsize\fR field. +Only the token information in the Tcl_Parse structure +is modified: the \fIcommentStart\fR, \fIcommentSize\fR, +\fIcommandStart\fR, and \fIcommandSize\fR fields are not modified. +.PP +After \fBTcl_ParseVarName\fR returns, the first token pointed to by +the \fItokenPtr\fR field of the +Tcl_Parse structure always has type \fBTCL_TOKEN_VARIABLE\fR. It +is followed by the sub-tokens that make up the variable name as +described above. The total length of the variable name is +contained in the \fIsize\fR field of the first token. +As in \fBTcl_ParseExpr\fR, +only the token information in the Tcl_Parse structure +is modified by \fBTcl_ParseVarName\fR: +the \fIcommentStart\fR, \fIcommentSize\fR, +\fIcommandStart\fR, and \fIcommandSize\fR fields are not modified. +.PP +All of the character pointers in the +Tcl_Parse and Tcl_Token structures refer +to characters in the \fIstring\fR argument passed to +\fBTcl_ParseCommand\fR, \fBTcl_ParseExpr\fR, \fBTcl_ParseBraces\fR, +\fBTcl_ParseQuotedString\fR, and \fBTcl_ParseVarName\fR. +.PP +There are additional fields in the Tcl_Parse structure after the +\fInumTokens\fR field, but these are for the private use of +\fBTcl_ParseCommand\fR, \fBTcl_ParseExpr\fR, \fBTcl_ParseBraces\fR, +\fBTcl_ParseQuotedString\fR, and \fBTcl_ParseVarName\fR; they should not be +referenced by code outside of these procedures. + +.SH KEYWORDS +backslash substitution, braces, command, expression, parse, token, variable substitution diff --git a/doc/SaveResult.3 b/doc/SaveResult.3 new file mode 100644 index 0000000..cd123b8 --- /dev/null +++ b/doc/SaveResult.3 @@ -0,0 +1,65 @@ +'\" +'\" Copyright (c) 1997 by Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) SaveResult.3 1.2 97/11/14 16:11:46 +'\" +.so man.macros +.TH Tcl_SaveResult 3 8.1 Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_SaveResult, Tcl_RestoreResult, Tcl_DiscardResult \- save and restore an interpreter's result +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +\fBTcl_SaveResult(\fIinterp, statePtr\fB)\fR +.sp +\fBTcl_RestoreResult(\fIinterp, statePtr\fB)\fR +.sp +\fBTcl_DiscardResult(\fIstatePtr\fB)\fR +.SH ARGUMENTS +.AS Tcl_SavedResult statePtr +.AP Tcl_Interp *interp in +Interpreter for which state should be saved. +.AP Tcl_SavedResult *statePtr in +Pointer to location where interpreter result should be saved or restored. +.BE + +.SH DESCRIPTION +.PP +These routines allows a C procedure to take a snapshot of the current +interpreter result so that it can be restored after a call +to \fBTcl_Eval\fR or some other routine that modifies the interpreter +result. These routines are passed a pointer to a structure that is +used to store enough information to restore the interpreter result +state. This structure can be allocated on the stack of the calling +procedure. These routines do not save the state of any error +information in the interpreter (e.g. the \fBerrorCode\fR or +\fBerrorInfo\fR variables). +.PP +\fBTcl_SaveResult\fR moves the string and object results +of \fIinterp\fR into the location specified by \fIstatePtr\fR. +\fBTcl_SaveResult\fR clears the result for \fIinterp\fR and +leaves the result in its normal empty initialized state. +.PP +\fBTcl_RestoreResult\fR moves the string and object results from +\fIstatePtr\fR back into \fIinterp\fR. Any result or error that was +already in the interpreter will be cleared. The \fIstatePtr\fR is left +in an uninitialized state and cannot be used until another call to +\fBTcl_SaveResult\fR. +.PP +\fBTcl_DiscardResult\fR releases the saved interpreter state +stored at \fBstatePtr\fR. The state structure is left in an +uninitialized state and cannot be used until another call to +\fBTcl_SaveResult\fR. +.PP +Once \fBTcl_SaveResult\fR is called to save the interpreter +result, either \fBTcl_RestoreResult\fR or +\fBTcl_DiscardResult\fR must be called to properly clean up the +memory associated with the saved state. + +.SH KEYWORDS +result, state, interp diff --git a/doc/SetVar.3 b/doc/SetVar.3 index 32e7a4c..5f4de76 100644 --- a/doc/SetVar.3 +++ b/doc/SetVar.3 @@ -5,23 +5,33 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) SetVar.3 1.30 97/10/10 16:10:36 +'\" SCCS: @(#) SetVar.3 1.32 98/02/05 20:55:24 '\" .so man.macros -.TH Tcl_SetVar 3 7.4 Tcl "Tcl Library Procedures" +.TH Tcl_SetVar 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME -Tcl_SetVar, Tcl_SetVar2, Tcl_GetVar, Tcl_GetVar2, Tcl_UnsetVar, Tcl_UnsetVar2 \- manipulate Tcl variables +Tcl_SetObjVar2, Tcl_SetVar, Tcl_SetVar2, Tcl_GetObjVar2, Tcl_GetVar, Tcl_GetVar2, Tcl_UnsetVar, Tcl_UnsetVar2 \- manipulate Tcl variables .SH SYNOPSIS .nf \fB#include \fR .sp +.VS 8.1 +Tcl_Obj * +\fBTcl_SetObjVar2\fR(\fIinterp, name1, name2, newValuePtr, flags\fR) +.VE +.sp char * \fBTcl_SetVar\fR(\fIinterp, varName, newValue, flags\fR) .sp char * \fBTcl_SetVar2\fR(\fIinterp, name1, name2, newValue, flags\fR) .sp +.VS 8.1 +Tcl_Obj * +\fBTcl_GetObjVar2\fR(\fIinterp, name1, name2, flags\fR) +.VE +.sp char * \fBTcl_GetVar\fR(\fIinterp, varName, flags\fR) .sp @@ -34,64 +44,100 @@ int int \fBTcl_UnsetVar2\fR(\fIinterp, name1, name2, flags\fR) .SH ARGUMENTS -.AS Tcl_Interp *newValue +.AS Tcl_Interp *newValuePtr .AP Tcl_Interp *interp in Interpreter containing variable. +.AP char *name1 in +Contains the name of an array variable (if \fIname2\fR is non-NULL) +or (if \fIname2\fR is NULL) either the name of a scalar variable +or a complete name including both variable name and index. +May include \fB::\fR namespace qualifiers +to specify a variable in a particular namespace. +.AP char *name2 in +If non-NULL, gives name of element within array; in this +case \fIname1\fR must refer to an array variable. +.AP Tcl_Obj *newValuePtr in +.VS 8.1 +Points to a Tcl object containing the new value for the variable. +.VE +.AP int flags in +OR-ed combination of bits providing additional information. See below +for valid values. .AP char *varName in Name of variable. -May include a series of \fB::\fR namespace qualifiers +May include \fB::\fR namespace qualifiers to specify a variable in a particular namespace. May refer to a scalar variable or an element of -an array variable. -If the name references an element of an array, then it +an array. +If the name references an element of an array, then the name must be in writable memory: Tcl will make temporary modifications to it while looking up the name. .AP char *newValue in -New value for variable. -.AP int flags in -OR-ed combination of bits providing additional information for -operation. See below for valid values. -.AP char *name1 in -Name of scalar variable, or name of array variable if \fIname2\fR -is non-NULL. -May include a series of \fB::\fR namespace qualifiers -to specify a variable in a particular namespace. -.AP char *name2 in -If non-NULL, gives name of element within array and \fIname1\fR -must refer to an array variable. +New value for variable, specified as a NULL-terminated string. +A copy of this value is stored in the variable. .BE .SH DESCRIPTION .PP -These procedures may be used to create, modify, read, and delete +These procedures are used to create, modify, read, and delete Tcl variables from C code. .PP -Note that \fBTcl_GetVar\fR and \fBTcl_SetVar\fR -have been largely replaced by the -object-based procedures \fBTcl_ObjGetVar2\fR and \fBTcl_ObjSetVar2\fR. -Those object-based procedures read, modify, and create -a variable whose name is held in a Tcl object instead of a string. -They also return a pointer to the object -which is the variable's value instead of returning a string. -Operations on objects can be faster since objects -hold an internal representation that can be manipulated more efficiently. -.PP -\fBTcl_SetVar\fR and \fBTcl_SetVar2\fR +.VS 8.1 +\fBTcl_SetObjVar2\fR, \fBTcl_SetVar\fR, and \fBTcl_SetVar2\fR will create a new variable or modify an existing one. -Both of these procedures set the given variable to the value -given by \fInewValue\fR, and they return a pointer to a -copy of the variable's new value, which is stored in Tcl's +These procedures set the given variable to the value +given by \fInewValuePtr\fR or \fInewValue\fR and return a +pointer to the variable's new value, which is stored in Tcl's variable structure. -Tcl keeps a private copy of the variable's value, so the caller -may change \fInewValue\fR after these procedures return without -affecting the value of the variable. +\fBTcl_SetObjVar2\fR takes the new value as a Tcl_Obj and returns +a pointer to a Tcl_Obj. \fBTcl_SetVar\fR and \fBTcl_SetVar2\fR +take the new value as a string and return a string; they are +usually less efficient than \fBTcl_SetObjVar2\fR. Note that the +return value may be different than the \fInewValuePtr\fR or +.VE +\fInewValue\fR argument, due to modifications made by write traces. If an error occurs in setting the variable (e.g. an array -variable is referenced without giving an index into the array), -they return NULL. +variable is referenced without giving an index into the array) +NULL is returned and an error message is left in \fIinterp\fR's +result if the \fBTCL_LEAVE_ERR_MSG\fR \fIflag\fR bit is set. +.PP +.VS 8.1 +\fBTcl_GetObjVar2\fR, \fBTcl_GetVar\fR, and \fBTcl_GetVar2\fR +return the current value of a variable. +The arguments to these procedures are treated in the same way +as the arguments as the procedures above. +Under normal circumstances, the return value is a pointer +to the variable's value. For \fBTcl_GetObjVar2\fR the value is +returned as a pointer to a Tcl_Obj. For \fBTcl_GetVar\fR and +\fBTcl_GetVar2\fR the value is returned as a string; this is +usually less efficient, so \fBTcl_GetObjVar2\fR is preferred. +.VE +If an error occurs while reading the variable (e.g. the variable +doesn't exist or an array element is specified for a scalar +variable), then NULL is returned and an error message is left +in \fIinterp\fR's result if the \fBTCL_LEAVE_ERR_MSG\fR \fIflag\fR +bit is set. .PP -The name of the variable may be specified to -\fBTcl_SetVar\fR and \fBTcl_SetVar2\fR in either of two ways. -If \fBTcl_SetVar\fR is called, the variable name is given as +\fBTcl_UnsetVar\fR and \fBTcl_UnsetVar2\fR may be used to remove +a variable, so that future attempts to read the variable will return +an error. +The arguments to these procedures are treated in the same way +as the arguments to the procedures above. +If the variable is successfully removed then TCL_OK is returned. +If the variable cannot be removed because it doesn't exist then +TCL_ERROR is returned and an error message is left +in \fIinterp\fR's result if the \fBTCL_LEAVE_ERR_MSG\fR \fIflag\fR +bit is set. +If an array element is specified, the given element is removed +but the array remains. +If an array name is specified without an index, then the entire +array is removed. +.PP +The name of a variable may be specified to these procedures in +four ways: +.IP [1] +If \fBTcl_SetVar\fR, \fBTcl_GetVar\fR, or \fBTcl_UnsetVar\fR +is invoked, the variable name is given as a single string, \fIvarName\fR. If \fIvarName\fR contains an open parenthesis and ends with a close parenthesis, then the value between the parentheses is @@ -100,22 +146,31 @@ the characters before the first open parenthesis are treated as the name of an array variable. If \fIvarName\fR doesn't have parentheses as described above, then the entire string is treated as the name of a scalar variable. -If \fBTcl_SetVar2\fR is called, then the array name and index -have been separated by the caller into two separate strings, -\fIname1\fR and \fIname2\fR respectively; if \fIname2\fR is -zero it means that a scalar variable is being referenced. +.IP [2] +If the \fIname1\fR and \fIname2\fR arguments are provided and +\fIname2\fR is non-NULL, then an array element is specified and +the array name and index have +already been separated by the caller: \fIname1\fR contains the +name and \fIname2\fR contains the index. +.VS 8.1 +An error is generated +if \fIname1\fR contains an open parenthesis and ends with a +close parenthesis (array element) and \fIname2\fR is non-NULL. +.IP [3] +If \fIname2\fR is NULL, \fIname1\fR is treated just like +\fIvarName\fR in case [1] above (it can be either a scalar or an array +element variable name). +.VE .PP The \fIflags\fR argument may be used to specify any of several options to the procedures. It consists of an OR-ed combination of the following bits. -Note that the flag bit TCL_PARSE_PART1 is only meaningful -for the procedures Tcl_SetVar2 and Tcl_GetVar2. .TP \fBTCL_GLOBAL_ONLY\fR -Under normal circumstances the procedures look up variables as follows: +Under normal circumstances the procedures look up variables as follows. If a procedure call is active in \fIinterp\fR, -a variable is looked up at the current level of procedure call. -Otherwise, a variable is looked up first in the current namespace, +the variable is looked up at the current level of procedure call. +Otherwise, the variable is looked up first in the current namespace, then in the global namespace. However, if this bit is set in \fIflags\fR then the variable is looked up only in the global namespace @@ -124,14 +179,10 @@ If both \fBTCL_GLOBAL_ONLY\fR and \fBTCL_NAMESPACE_ONLY\fR are given, \fBTCL_GLOBAL_ONLY\fR is ignored. .TP \fBTCL_NAMESPACE_ONLY\fR -Under normal circumstances the procedures look up variables as follows: -If a procedure call is active in \fIinterp\fR, -a variable is looked up at the current level of procedure call. -Otherwise, a variable is looked up first in the current namespace, -then in the global namespace. -However, if this bit is set in \fIflags\fR then the variable -is looked up only in the current namespace -even if there is a procedure call active. +If this bit is set in \fIflags\fR then the variable +is looked up only in the current namespace; if a procedure is active +its variables are ignored, and the global namespace is also ignored unless +it is the current namespace. .TP \fBTCL_LEAVE_ERR_MSG\fR If an error is returned and this bit is set in \fIflags\fR, then @@ -142,9 +193,10 @@ If this flag bit isn't set then no error message is left and the interpreter's result will not be modified. .TP \fBTCL_APPEND_VALUE\fR -If this bit is set then \fInewValue\fR is appended to the current -value, instead of replacing it. -If the variable is currently undefined, then this bit is ignored. +If this bit is set then \fInewValuePtr\fR or \fInewValue\fR is +appended to the current value instead of replacing it. +If the variable is currently undefined, then the bit is ignored. +This bit is only used by the \fBTcl_Set*\fR procedures. .TP \fBTCL_LIST_ELEMENT\fR If this bit is set, then \fInewValue\fR is converted to a valid @@ -153,18 +205,6 @@ A separator space is appended before the new list element unless the list element is going to be the first element in a list or sublist (i.e. the variable's current value is empty, or contains the single character ``{'', or ends in `` }''). -.TP -\fBTCL_PARSE_PART1\fR -If this bit is set when calling \fITcl_SetVar2\fR and \fITcl_GetVar2\fR, -\fIname1\fR may contain both an array and an element name: -if the name contains an open parenthesis and ends with a -close parenthesis, then the value between the parentheses is -treated as an element name (which can have any string value) and -the characters before the first open -parenthesis are treated as the name of an array variable. -If the flag TCL_PARSE_PART1 is given, -\fIname2\fR should be NULL since the array and element names -are taken from \fIname1\fR. .PP \fBTcl_GetVar\fR and \fBTcl_GetVar2\fR return the current value of a variable. @@ -178,8 +218,6 @@ or \fBTcl_SetVar2\fR). and TCL_LEAVE_ERR_MSG, both of which have the same meaning as for \fBTcl_SetVar\fR. -In addition, \fBTcl_GetVar2\fR uses the bit TCL_PARSE_PART1, -which has the same meaning as for \fBTcl_SetVar2\fR. If an error occurs in reading the variable (e.g. the variable doesn't exist or an array element is specified for a scalar variable), then NULL is returned. @@ -198,7 +236,7 @@ If an array name is specified without an index, then the entire array is removed. .SH "SEE ALSO" -Tcl_GetObjResult, Tcl_GetStringResult, Tcl_ObjGetVar2, Tcl_ObjSetVar2, Tcl_TraceVar +Tcl_GetObjResult, Tcl_GetStringResult, Tcl_TraceVar .SH KEYWORDS -array, interpreter, object, scalar, set, unset, variable +array, get variable, interpreter, object, scalar, set, unset, variable diff --git a/doc/StringObj.3 b/doc/StringObj.3 index a98fc46..1f654c2 100644 --- a/doc/StringObj.3 +++ b/doc/StringObj.3 @@ -4,13 +4,13 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) @(#) StringObj.3 1.13 97/06/25 13:40:25 +'\" SCCS: @(#) @(#) StringObj.3 1.16 98/01/26 19:39:49 '\" .so man.macros -.TH Tcl_StringObj 3 8.0 Tcl "Tcl Library Procedures" +.TH Tcl_StringObj 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME -Tcl_NewStringObj, Tcl_SetStringObj, Tcl_GetStringFromObj, Tcl_AppendToObj, Tcl_AppendStringsToObj, Tcl_SetObjLength, TclConcatObj \- manipulate Tcl objects as strings +Tcl_NewStringObj, Tcl_SetStringObj, Tcl_GetStringFromObj, Tcl_GetString, Tcl_AppendToObj, Tcl_AppendStringsToObj, Tcl_AppendObjToObj, Tcl_SetObjLength, Tcl_ConcatObj \- manipulate Tcl objects as strings .SH SYNOPSIS .nf \fB#include \fR @@ -23,7 +23,14 @@ Tcl_Obj * char * \fBTcl_GetStringFromObj\fR(\fIobjPtr, lengthPtr\fR) .sp +char * +\fBTcl_GetString\fR(\fIobjPtr\fR) +.sp \fBTcl_AppendToObj\fR(\fIobjPtr, bytes, length\fR) +.VS +.sp +\fBTcl_AppendObjToObj\fR(\fIobjPtr, appendObjPtr\fR) +.VE .sp \fBTcl_AppendStringsToObj\fR(\fIobjPtr, string, string, ... \fB(char *) NULL\fR) .sp @@ -32,7 +39,7 @@ char * Tcl_Obj * \fBTcl_ConcatObj\fR(\fIobjc, objv\fR) .SH ARGUMENTS -.AS Tcl_Interp *lengthPtr out +.AS Tcl_Interp *appendObjPtr in/out .AP char *bytes in Points to the first byte of an array of bytes used to set or append to a string object. @@ -44,6 +51,10 @@ initializing, setting, or appending to a string object. If negative, all bytes up to the first null are used. .AP Tcl_Obj *objPtr in/out Points to an object to manipulate. +.VS +.AP Tcl_Obj *appendObjPtr in +The object to append to \fIobjPtr\fR in \fBTcl_AppendObjToObj\fR. +.VE .AP int *lengthPtr out If non-NULL, the location where \fBTcl_GetStringFromObj\fR will store the the length of an object's string representation. @@ -76,21 +87,30 @@ Both procedures set the object to hold a copy of the specified string. \fBTcl_SetStringObj\fR frees any old string representation as well as any old internal representation of the object. .PP -\fBTcl_GetStringFromObj\fR returns an object's string representation. +\fBTcl_GetStringFromObj\fR and \fBTcl_GetString\fR +return an object's string representation. This is given by the returned byte pointer -and length, which is stored in \fIlengthPtr\fR if it is non-NULL. +and (for \fBTcl_GetStringFromObj\fR) length, +which is stored in \fIlengthPtr\fR if it is non-NULL. If the object's string representation is invalid (its byte pointer is NULL), the string representation is regenerated from the object's internal representation. The storage referenced by the returned byte pointer is owned by the object manager and should not be modified by the caller. +The procedure \fBTcl_GetString\fR is used in the common case +where the caller does not need the length of the string representation. .PP \fBTcl_AppendToObj\fR appends the data given by \fIbytes\fR and \fIlength\fR to the object specified by \fIobjPtr\fR. It does this in a way that handles repeated calls relatively efficiently (it overallocates the string space to avoid repeated reallocations and copies of object's string value). +.VS +.PP +\fBTcl_AppendObjToObj\fR is similar to \fBTcl_AppendToObj\fR, but it +appends the string value of \fIappendObjPtr\fR to \fIobjPtr\fR. +.VE .PP \fBTcl_AppendStringsToObj\fR is similar to \fBTcl_AppendToObj\fR except that it can be passed more than one value to append and diff --git a/doc/Tcl.n b/doc/Tcl.n index 610fe1b..18e5b10 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -5,10 +5,10 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) Tcl.n 1.128 96/08/26 12:59:50 +'\" SCCS: @(#) Tcl.n 1.130 97/10/15 10:16:41 ' .so man.macros -.TH Tcl n "" Tcl "Tcl Built-In Commands" +.TH Tcl n "8.1" Tcl "Tcl Built-In Commands" .BS .SH NAME Tcl \- Summary of Tcl language syntax. @@ -111,47 +111,61 @@ special processing. The following table lists the backslash sequences that are handled specially, along with the value that replaces each sequence. .RS -.TP 6 +.TP 7 \e\fBa\fR Audible alert (bell) (0x7). -.TP 6 +.TP 7 \e\fBb\fR Backspace (0x8). -.TP 6 +.TP 7 \e\fBf\fR Form feed (0xc). -.TP 6 +.TP 7 \e\fBn\fR Newline (0xa). -.TP 6 +.TP 7 \e\fBr\fR Carriage-return (0xd). -.TP 6 +.TP 7 \e\fBt\fR Tab (0x9). -.TP 6 +.TP 7 \e\fBv\fR Vertical tab (0xb). -.TP 6 +.TP 7 \e\fB\fIwhiteSpace\fR -A single space character replaces the backslash, newline, and all -spaces and tabs after the newline. -This backslash sequence is unique in that it is replaced in a separate -pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between -braces, and the resulting space will be treated as a word separator -if it isn't in braces or quotes. -.TP 6 +. +A single space character replaces the backslash, newline, and all spaces +and tabs after the newline. This backslash sequence is unique in that it +is replaced in a separate pre-pass before the command is actually parsed. +This means that it will be replaced even when it occurs between braces, +and the resulting space will be treated as a word separator if it isn't +in braces or quotes. +.TP 7 \e\e Backslash (``\e''). -.TP 6 -\e\fIooo\fR -The digits \fIooo\fR (one, two, or three of them) give the octal value of -the character. -.TP 6 -\e\fBx\fIhh\fR -The hexadecimal digits \fIhh\fR give the hexadecimal value of -the character. Any number of digits may be present. +.VS 8.1 br +.TP 7 +\e\fIooo\fR +. +The digits \fIooo\fR (one, two, or three of them) give an eight-bit octal +value for the Unicode character that will be inserted. The upper bits of the +Unicode character will be 0. +.TP 7 +\e\fBx\fIhh\fR +. +The hexadecimal digits \fIhh\fR give an eight-bit hexadecimal value for the +Unicode character that will be inserted. Any number of hexadecimal digits +may be present; however, all but the last two are ignored (the result is +always a one-byte quantity). The upper bits of the Unicode character will +be 0. +.TP 7 +\e\fBu\fIhhhh\fR +. +The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a +sixteen-bit hexadecimal value for the Unicode character that will be +inserted. +.VE .LP Backslash substitution is not performed on words enclosed in braces, except for backslash-newline as described above. diff --git a/doc/Thread.3 b/doc/Thread.3 new file mode 100644 index 0000000..99648a1 --- /dev/null +++ b/doc/Thread.3 @@ -0,0 +1,97 @@ +'\" +'\" Copyright (c) 1998 Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) Thread.3 1.2 98/02/19 14:02:17 +'\" +.so man.macros +.TH Tcl_ConditionNotify 3 "" Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_ConditionNotify, Tcl_ConditionWait, Tcl_GetThreadData, Tcl_MutexLock, Tcl_MutexUnlock \- thread synchronization support. +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +void +\fBTcl_ConditionNotify\fR(\fIcondPtr\fR) +.sp +void +\fBTcl_ConditionWait\fR(\fIcondPtr, mutexPtr, timePtr\fR) +.sp +VOID * +\fBTcl_GetThreadData\fR(\fIkeyPtr, size\fR) +.sp +void +\fBTcl_MutexLock\fR(\fImutexPtr\fR) +.sp +void +\fBTcl_MutexUnlock\fR(\fImutexPtr\fR) +.SH ARGUMENTS +.AS Tcl_ThreadDataKey *keyPtr +.AP Tcl_Condition *condPtr in +A condition variable, which must be associated with a mutex lock. +.AP Tcl_Condition *mutexPtr in +A mutex lock. +.AP Tcl_Time *timePtr in +A time limit on the condition wait. NULL to wait forever. +Note that a polling value of 0 seconds doesn't make much sense. +.AP Tcl_ThreadDataKey *keyPtr in +This identifies a block of thread local storage. The key should be +static and process-wide, yet each thread will end up associating +a different block of storage with this key. +.AP int *size in +The size of the thread local storage block. This amount of data +is allocated and initialized to zero the first time each thread +calls \fBTcl_GetThreadData\fR. +.BE + +.SH DESCRIPTION +.PP +A mutex is a lock that is used to serialize all threads through a piece +of code by calling \fBTcl_MutexLock\fR and \fBTcl_MutexUnlock\fR. +If one thread holds a mutex, any other thread calling \fBTcl_MutexLock\fR will +block until \fBTcl_MutexUnlock\fR is called. A thread can deadlock +on itself if it tries to lock the mutex twice. +\fBTcl_MutexLock\fR and \fBTcl_MutexUnlock\fR +procedures are defined as empty macros if not compiling with threads enabled. +.PP +A condition variable is used as a signaling mechanism: +a thread can lock a mutex and then wait on a condition variable +with \fBTcl_ConditionWait\fR. This atomically releases the mutex lock +and blocks the waiting thread until another thread calls +\fBTcl_ConditionNotify\fR. The caller of \fBTcl_ConditionNotify\fR should +have the associated mutex held by previously calling \fBTcl_MutexLock\fR, +but this is not enforced. Notifying the +condition variable unblocks all threads waiting on the condition variable, +but they do not proceed until the mutex is released with \fBTcl_MutexUnlock\fR. +The implementation of \fBTcl_ConditionWait\fR automatically locks +the mutex before returning. +.PP +The caller of \fBTcl_ConditionWait\fR should be prepared for spurious +notifications by calling \fBTcl_ConditionWait\fR within a while loop +that tests some invariant. +.PP +The \fBTcl_GetThreadData\fR call returns a pointer to a block of +thread-private data. Its argument is a key that is shared by all threads +and a size for the block of storage. The storage is automatically +allocated and initialized to all zeros the first time each thread asks for it. +The storage is automatically deallocated by \fBTcl_FinalizeThread\fR +.SH INITIALIZATION +.PP +.PP +All of these synchronization objects are self initializing. +They are implemented as opaque pointers that should be NULL +upon first use. +The mutexes and condition variables are +cleaned up by process exit handlers. Thread local storage is +reclaimed during \fBTcl_FinalizeThread\fR. +.SH CREATING THREADS +The API to create threads is not finalized at this time. +There are private facilities to create threads that contain a new +Tcl interpreter, and to send scripts among threads. +Dive into tclThreadTest.c and tclThread.c for examples. +.SH KEYWORDS +thread, mutex, condition variable, thread local storage diff --git a/doc/ToUpper.3 b/doc/ToUpper.3 new file mode 100644 index 0000000..971eb5a --- /dev/null +++ b/doc/ToUpper.3 @@ -0,0 +1,90 @@ +'\" +'\" Copyright (c) 1997 by Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) ToUpper.3 1.1 98/01/12 17:17:02 +'\" +.so man.macros +.TH Tcl_UtfToUpper 3 "8.1" Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings. +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +Tcl_UniChar +\fBTcl_UniCharToUpper\fR(\fIch\fR) +.sp +Tcl_UniChar +\fBTcl_UniCharToLower\fR(\fIch\fR) +.sp +Tcl_UniChar +\fBTcl_UniCharToTitle\fR(\fIch\fR) +.sp +int +\fBTcl_UtfToUpper\fR(\fIstr\fR) +.sp +int +\fBTcl_UtfToLower\fR(\fIstr\fR) +.sp +int +\fBTcl_UtfToTitle\fR(\fIstr\fR) +.SH ARGUMENTS +.AS char *str in/out +.AP int ch in +The Tcl_UniChar to be converted. +.AP char *str in/out +Pointer to UTF-8 string to be converted in place. +.BE + +.SH DESCRIPTION +.PP +The first three routines convert the case of individual Unicode characters: +.PP +If \fIch\fR represents a lower-case character, +\fBTcl_UniCharToUpper\fR returns the corresponding upper-case +character. If no upper-case character is defined, it returns the +character unchanged. +.PP +If \fIch\fR represents an upper-case character, +\fBTcl_UniCharToLower\fR returns the corresponding lower-case +character. If no lower-case character is defined, it returns the +character unchanged. +.PP +If \fIch\fR represents a lower-case character, +\fBTcl_UniCharToTitle\fR returns the corresponding title-case +character. If no title-case character is defined, it returns the +corresponding upper-case character. If no upper-case character is +defined, it returns the character unchanged. Title-case is defined +for a small number of characters that have a different appearance when +they are at the beginning of a capitalized word. +.PP +The next three routines convert the case of UTF-8 strings in place in +memory: +.PP +\fBTcl_UtfToUpper\fR changes every UTF-8 character in \fIstr\fR to +upper-case. Because changing the case of a character may change its +size, the byte offset of each character in the resulting string may +differ from its original location. \fBTcl_UtfToUpper\fR writes a null +byte at the end of the converted string. \fBTcl_UtfToUpper\fR returns +the new length of the string in bytes. This new length is guaranteed +to be no longer than the original string length. +.PP +\fBTcl_UtfToLower\fR is the same as \fBTcl_UtfToUpper\fR except it +turns each character in the string into its lower-case equivalent. +.PP +\fBTcl_UtfToTitle\fR is the same as \fBTcl_UtfToUpper\fR except it +turns the first character in the string into its title-case equivalent +and all following characters into their lower-case equivalents. + +.SH BUGS +.PP +At this time, the case conversions are only defined for the ISO8859-1 +characters. Unicode characters above 0x00ff are not modified by these +routines. + +.SH KEYWORDS +utf, unicode, toupper, tolower, totitle, case diff --git a/doc/TraceVar.3 b/doc/TraceVar.3 index 976be4f..c0d7c3a 100644 --- a/doc/TraceVar.3 +++ b/doc/TraceVar.3 @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) TraceVar.3 1.27 97/10/10 15:05:37 +'\" SCCS: @(#) TraceVar.3 1.29 98/02/19 13:20:48 '\" .so man.macros .TH Tcl_TraceVar 3 7.4 Tcl "Tcl Library Procedures" @@ -44,7 +44,7 @@ must be in writable memory: Tcl will make temporary modifications to it while looking up the name. .AP int flags in OR-ed combination of the values TCL_TRACE_READS, TCL_TRACE_WRITES, and -TCL_TRACE_UNSETS, TCL_PARSE_PART1, and TCL_GLOBAL_ONLY. +TCL_TRACE_UNSETS, TCL_TRACE_ARRAY, and TCL_GLOBAL_ONLY. Not all flags are used by all procedures. See below for more information. .AP Tcl_VarTraceProc *proc in @@ -96,6 +96,12 @@ A variable may be unset either explicitly by an \fBunset\fR command, or implicitly when a procedure returns (its local variables are automatically unset) or when the interpreter is deleted (all variables are automatically unset). +.TP +\fBTCL_TRACE_ARRAY\fR +Invoke \fIproc\fR whenever the array command is invoked. +This gives the trace procedure a chance to update the array before +array names or array get is called. Note that this is called +before an array set, but that will trigger write traces. .PP Whenever one of the specified operations occurs on the variable, \fIproc\fR will be invoked. @@ -120,7 +126,8 @@ in the normal two-part form (see the description of \fBTcl_TraceVar2\fR below for details). \fIFlags\fR is an OR-ed combination of bits providing several pieces of information. -One of the bits TCL_TRACE_READS, TCL_TRACE_WRITES, or TCL_TRACE_UNSETS +One of the bits TCL_TRACE_READS, TCL_TRACE_WRITES, TCL_TRACE_ARRAY, +or TCL_TRACE_UNSETS will be set in \fIflags\fR to indicate which operation is being performed on the variable. The bit TCL_GLOBAL_ONLY will be set whenever the variable being @@ -175,24 +182,26 @@ The procedures \fBTcl_TraceVar2\fR, \fBTcl_UntraceVar2\fR, and except that the name of the variable consists of two parts. \fIName1\fR gives the name of a scalar variable or array, and \fIname2\fR gives the name of an element within an array. -If \fIname2\fR is NULL it means that either the variable is -a scalar or the trace is to be set on the entire array rather -than an individual element (see WHOLE-ARRAY TRACES below for -more information). -As a special case, if the flag TCL_PARSE_PART1 is specified, +.VS 8.1 +When \fIname2\fR is NULL, \fIname1\fR may contain both an array and an element name: if the name contains an open parenthesis and ends with a close parenthesis, then the value between the parentheses is treated as an element name (which can have any string value) and the characters before the first open parenthesis are treated as the name of an array variable. -If the flag TCL_PARSE_PART1 is given, -\fIname2\fR should be NULL since the array and element names -are taken from \fIname1\fR. +If \fIname2\fR is NULL and \fIname1\fR does not refer +to an array element +.VE +it means that either the variable is +a scalar or the trace is to be set on the entire array rather +than an individual element (see WHOLE-ARRAY TRACES below for +more information). + .SH "ACCESSING VARIABLES DURING TRACES" .PP -During read and write traces, the +During read, write, and array traces, the trace procedure can read, write, or unset the traced variable using \fBTcl_GetVar2\fR, \fBTcl_SetVar2\fR, and other procedures. @@ -245,6 +254,12 @@ access. If it deletes the variable then the traced access will return an empty string. .PP +When array tracing has been specified, the trace procedure +will be invoked at the beginning of the array command implementation, +before any of the operations like get, set, or names have been invoked. +The trace procedure can modify the array elements with \fBTcl_SetVar\fR +and \fBTcl_SetVar2\fR. +.PP When unset tracing has been specified, the trace procedure will be invoked whenever the variable is destroyed. The traces will be called after the variable has been @@ -343,6 +358,9 @@ to clean up and free their own internal data structures. Tcl doesn't do any error checking to prevent trace procedures from misusing the interpreter during traces with TCL_INTERP_DESTROYED set. +.PP +Array traces are not yet integrated with the Tcl "info exists" command, +nor is there Tcl-level access to array traces. .SH KEYWORDS clientData, trace, variable diff --git a/doc/Translate.3 b/doc/Translate.3 index 6330ee9..0b7b918 100644 --- a/doc/Translate.3 +++ b/doc/Translate.3 @@ -1,14 +1,14 @@ '\" '\" Copyright (c) 1989-1993 The Regents of the University of California. -'\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 1994-1998 Sun Microsystems, Inc. '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) Translate.3 1.22 96/08/26 12:59:51 +'\" SCCS: @(#) Translate.3 1.24 98/01/16 23:58:06 '\" .so man.macros -.TH Tcl_TranslateFileName 3 7.5 Tcl "Tcl Library Procedures" +.TH Tcl_TranslateFileName 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME Tcl_TranslateFileName \- convert file name to native form and replace tilde with home directory @@ -26,7 +26,7 @@ Interpreter in which to report an error, if any. File name, which may start with a ``~''. .AP Tcl_DString *bufferPtr in/out If needed, this dynamic string is used to store the new file name. -At the time of the call it should be uninitialized or empty. The +At the time of the call it should be uninitialized or free. The caller must eventually call \fBTcl_DStringFree\fR to free up anything stored here. .BE diff --git a/doc/Utf.3 b/doc/Utf.3 new file mode 100644 index 0000000..062efa2 --- /dev/null +++ b/doc/Utf.3 @@ -0,0 +1,160 @@ +'\" +'\" Copyright (c) 1997 Sun Microsystems, Inc. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" SCCS: @(#) Utf.3 1.4 98/01/19 14:35:31 +'\" +.so man.macros +.TH Utf 3 "8.1" Tcl "Tcl Library Procedures" +.BS +.SH NAME +Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings. +.SH SYNOPSIS +.nf +\fB#include \fR +.sp +typedef ... Tcl_UniChar; +.sp +int +\fBTcl_UniCharToUtf\fR(\fIch, buf\fR) +.sp +int +\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR) +.sp +int +\fBTcl_UtfCharComplete\fR(\fIsrc, len\fR) +.sp +int +\fBTcl_NumUtfChars\fR(\fIsrc, len\fR) +.sp +char * +\fBTcl_UtfFindFirst\fR(\fIsrc, ch\fR) +.sp +char * +\fBTcl_UtfFindLast\fR(\fIsrc, ch\fR) +.sp +char * +\fBTcl_UtfNext\fR(\fIsrc\fR) +.sp +char * +\fBTcl_UtfPrev\fR(\fIsrc, start\fR) +.sp +Tcl_UniChar +\fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR) +.sp +char * +\fBTcl_UtfAtIndex\fR(\fIsrc, index\fR) +.sp +int +\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR) +.SH ARGUMENTS +.AS "CONST char" *chPtr out +.AP char *buf out +Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most +TCL_UTF_MAX bytes are stored in the buffer. +.AP int ch in +The Tcl_UniChar to be converted or examined. +.AP Tcl_UniChar *chPtr out +Filled with the Tcl_UniChar represented by the head of the UTF-8 string. +.AP "CONST char" *src in +Pointer to a UTF-8 string. +.AP int len in +The length of the UTF-8 string in bytes (not UTF-8 characters). If +negative, all bytes up to the first null byte are used. +.AP "CONST char" *start in +Pointer to the beginning of a UTF-8 string. +.AP int index in +The index of a character (not byte) in the UTF-8 string. +.AP int *readPtr out +If non-NULL, filled with the number of bytes in the backslash sequence, +including the backslash character. +.AP char *dst out +Buffer in which the bytes represented by the backslash sequence are stored. +At most TCL_UTF_MAX bytes are stored in the buffer. +.BE + +.SH DESCRIPTION +.PP +These routines convert between UTF-8 strings and Tcl_UniChars. A +Tcl_UniChar is a Unicode character represented as an unsigned, fixed-size +quantity. A UTF-8 character is a Unicode character represented as +a varying-length sequence of up to TCL_UTF_MAX bytes. A multibyte UTF-8 +sequence consists of a lead byte followed by some number of trail bytes. +.PP +\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to +represent one Unicode character in the UTF-8 representation. +.PP +\fBTcl_UniCharToUtf\fR stores the Tcl_UniChar \fIch\fR as a UTF-8 string +in starting at \fIbuf\fR. The return value is the number of bytes stored +in \fIbuf\fR. +.PP +\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR +and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the +number of bytes read from \fIsrc\fR.. The caller must ensure that the +source buffer is long enough such that this routine does not run off the +end and dereference non-existent or random memory; if the source buffer +is known to be null terminated, this will not happen. If the input is +not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first +byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and +0x00ff and return 1. +.PP +\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR +of length \fIlen\fR bytes is long enough to be decoded by +\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee +that the UTF-8 string is properly formed. This routine is used by +procedures that are operating on a byte at a time and need to know if a +full Tcl_UniChar has been seen. +.PP +\fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It +returns the number of Tcl_UniChars that are represented by the UTF-8 string +\fIsrc\fR. The length of the source string is \fIlen\fR bytes. If the +length is negative, all bytes up to the first NULL byte are used. +.PP +\fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It +returns a pointer to the first occurance of the Tcl_UniChar \fIch\fR +in the NULL-terminated UTF-8 string \fIsrc\fR. The NULL terminator is +considered part of the UTF-8 string. +.PP +\fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings. It +returns a pointer to the last occurance of the Tcl_UniChar \fIch\fR +in the NULL terminated UTF-8 string \fIsrc\fR. The NULL terminator is +considered part of the UTF-8 string. +.PP +Given \fIsrc\fR, a pointer to some location in a UTF-8 string, +\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the +string. The caller must not ask for the next character after the last +character in the string. +.PP +Given \fIsrc\fR, a pointer to some location in a UTF-8 string, +\fBTcl_UtfPrev\fR returns a pointer to the previous UTF-8 character in the +string. This function will not back up to a position before \fIstart\fR, +the start of the UTF-8 string. If \fIsrc\fR was already at \fIstart\fR, the +return value will be \fIstart\fR. +.PP +\fBTcl_UniCharAtIndex\fR corresponds to a C string array dereference or the +Pascal Ord() function. It returns the Tcl_UniChar represented at the +specified character (not byte) \fIindex\fR in the UTF-8 string +\fIsrc\fR. The source string must contain at least \fIindex\fR +characters. +.PP +\fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not +byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must +contain at least \fIindex\fR characters. This is equivalent to calling +\fBTcl_UtfNext\fR \fIindex\fR times. +.PP +\fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl +commands. It parses a backslash sequence and stores the properly formed +UTF-8 character represented by the backslash sequence in the output +buffer \fIdst\fR. At most TCL_UTF_MAX bytes are stored in the buffer. +\fBTcl_UtfBackslash\fR modifies \fI*readPtr\fR to contain the number +of bytes in the backslash sequence, including the backslash character. +The return value is the number of bytes stored in the output buffer. +.PP +See the \fBTcl\fR manual entry for information on the valid backslash +sequences. All of the sequences described in the Tcl manual entry are +supported by \fBTcl_UtfBackslash\fR. + +.SH KEYWORDS +utf, unicode, backslash diff --git a/doc/binary.n b/doc/binary.n index 067c52e..2833eed 100644 --- a/doc/binary.n +++ b/doc/binary.n @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) binary.n 1.7 97/11/11 19:08:47 +'\" SCCS: @(#) binary.n 1.8 97/11/20 12:35:30 '\" .so man.macros .TH binary n 8.0 Tcl "Tcl Built-In Commands" diff --git a/doc/exec.n b/doc/exec.n index 22caf80..4dede37 100644 --- a/doc/exec.n +++ b/doc/exec.n @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) exec.n 1.17 96/09/18 15:21:17 +'\" SCCS: @(#) exec.n 1.18 96/11/25 14:18:47 '\" .so man.macros .TH exec n 7.6 Tcl "Tcl Built-In Commands" @@ -202,10 +202,11 @@ instead of ``applbakery.default''). Two or more forward or backward slashes in a row in a path refer to a network path. For example, a simple concatenation of the root directory \fBc:/\fR with a subdirectory \fB/windows/system\fR will yield -\fBc://windows/system\fR (two slashes together), which refers to the -directory \fB/system\fR on the machine \fBwindows\fR (and the \fBc:/\fR is -ignored), and is not equivalent to \fBc:/windows/system\fR, which describes -a directory on the current computer. +\fBc://windows/system\fR (two slashes together), which refers to the mount +point called \fBsystem\fR on the machine called \fBwindows\fR (and the +\fBc:/\fR is ignored), and is not equivalent to \fBc:/windows/system\fR, +which describes a directory on the current computer. The \fBfile join\fR +command should be used to concatenate path components. .TP \fBWindows NT\fR . @@ -264,7 +265,7 @@ the caller must prepend ``\fBcommand.com /c\0\fR'' to the desired command. Once a 16-bit DOS application has read standard input from a console and then quit, all subsequently run 16-bit DOS applications will see the standard input as already closed. 32-bit applications do not have this -problem and will run correctly even after a 16-bit DOS application thinks +problem and will run correctly, even after a 16-bit DOS application thinks that standard input is closed. There is no known workaround for this bug at this time. .sp @@ -282,8 +283,8 @@ other end of the pipe must be closed before the 16-bit DOS application begins executing. All standard output or error from a 16-bit DOS application to a pipe is collected into temporary files; the application must terminate before the temporary files are redirected to the next stage -of the pipeline. This is due to a workaround for a Windows 95 bug in the -implementation of pipes, and is how the Windows 95 command line interpreter +of the pipeline. This is due to a workaround for a Windows 95 bug in the +implementation of pipes, and is how the standard Windows 95 DOS shell handles pipes itself. .sp Certain applications, such as \fBcommand.com\fR, should not be executed diff --git a/doc/fconfigure.n b/doc/fconfigure.n index 1c187ac..414efb1 100644 --- a/doc/fconfigure.n +++ b/doc/fconfigure.n @@ -4,10 +4,10 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) fconfigure.n 1.23 96/04/16 08:20:07 +'\" SCCS: @(#) fconfigure.n 1.25 97/10/20 15:22:10 '\" .so man.macros -.TH fconfigure n 7.5 Tcl "Tcl Built-In Commands" +.TH fconfigure n 8.1 Tcl "Tcl Built-In Commands" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME @@ -51,122 +51,143 @@ using the Tcl event loop (e.g. by calling \fBTcl_DoOneEvent\fR or invoking the \fBvwait\fR command). .TP \fB\-buffering\fR \fInewValue\fR +. If \fInewValue\fR is \fBfull\fR then the I/O system will buffer output until its internal buffer is full or until the \fBflush\fR command is invoked. If \fInewValue\fR is \fBline\fR, then the I/O system will automatically flush output for the channel whenever a newline character is output. If \fInewValue\fR is \fBnone\fR, the I/O system will flush -automatically after every output operation. -The default is for \fB\-buffering\fR to be set to \fBfull\fR except for -channels that connect to terminal-like devices; for these channels the -initial setting is \fBline\fR. +automatically after every output operation. The default is for +\fB\-buffering\fR to be set to \fBfull\fR except for channels that +connect to terminal-like devices; for these channels the initial setting +is \fBline\fR. .TP \fB\-buffersize\fR \fInewSize\fR +. \fINewvalue\fR must be an integer; its value is used to set the size of buffers, in bytes, subsequently allocated for this channel to store input or output. \fINewvalue\fR must be between ten and one million, allowing buffers of ten to one million bytes in size. +.VS 8.1 br +.TP +\fB\-encoding\fR \fIname\fR +. +This option is used to specify the encoding of the channel, so that the data +can be converted to and from Unicode for use in Tcl. For instance, in +order for Tcl to read characters from a Japanese file in \fBshiftjis\fR +and properly process and display the contents, the encoding would be set +to \fBshiftjis\fR. Thereafter, when reading from the channel, the bytes in +the Japanese file would be converted to Unicode as they are read. +Writing is also supported \- as Tcl strings are written to the channel they +will automatically be converted to the specified encoding on output. +.RS +.PP +If a file contains pure binary data (for instance, a JPEG image), the +encoding for the channel should be configured to be \fBbinary\fR. Tcl +will then assign no interpretation to the data in the file and simply read or +write raw bytes. The Tcl \fBbinary\fR command can be used to manipulate this +byte-oriented data. +.PP +The default encoding for newly opened channels is the same platform- and +locale-dependent system encoding used for interfacing with the operating +system. +.RE +.VE .TP \fB\-eofchar\fR \fIchar\fR .TP \fB\-eofchar\fR \fB{\fIinChar outChar\fB}\fR -This option supports DOS file systems that use Control-z (\ex1a) as -an end of file marker. -If \fIchar\fR is not an empty string, then this character signals -end of file when it is encountered during input. -For output, the end of file character is output when -the channel is closed. -If \fIchar\fR is the empty string, then there is no special -end of file character marker. -For read-write channels, a two-element list specifies -the end of file marker for input and output, respectively. -As a convenience, when setting the end-of-file character -for a read-write channel -you can specify a single value that will apply to both reading and writing. -When querying the end-of-file character of a read-write channel, -a two-element list will always be returned. -The default value for \fB\-eofchar\fR is the empty string in all -cases except for files under Windows. In that case the \fB\-eofchar\fR -is Control-z (\ex1a) for reading and the empty string for writing. +. +This option supports DOS file systems that use Control-z (\ex1a) as an +end of file marker. If \fIchar\fR is not an empty string, then this +character signals end-of-file when it is encountered during input. For +output, the end-of-file character is output when the channel is closed. +If \fIchar\fR is the empty string, then there is no special end of file +character marker. For read-write channels, a two-element list specifies +the end of file marker for input and output, respectively. As a +convenience, when setting the end-of-file character for a read-write +channel you can specify a single value that will apply to both reading +and writing. When querying the end-of-file character of a read-write +channel, a two-element list will always be returned. The default value +for \fB\-eofchar\fR is the empty string in all cases except for files +under Windows. In that case the \fB\-eofchar\fR is Control-z (\ex1a) for +reading and the empty string for writing. .TP \fB\-translation\fR \fImode\fR .TP -\fB\-translation\fR \fB{\fIinMode outMode\fB}\fR -In Tcl scripts the end of a line is always represented using a -single newline character (\en). -However, in actual files and devices the end of a line may be -represented differently on different platforms, or even for -different devices on the same platform. For example, under UNIX -newlines are used in files, whereas carriage-return-linefeed -sequences are normally used in network connections. -On input (i.e., with \fBgets\fP and \fBread\fP) -the Tcl I/O system automatically translates the external end-of-line -representation into newline characters. -Upon output (i.e., with \fBputs\fP), -the I/O system translates newlines to the external -end-of-line representation. -The default translation mode, \fBauto\fP, handles all the common -cases automatically, but the \fB\-translation\fR option provides -explicit control over the end of line translations. +\fB\-translation\fR \fB{\fIinMode outMode\fB}\fR +. +In Tcl scripts the end of a line is always represented using a single +newline character (\en). However, in actual files and devices the end of +a line may be represented differently on different platforms, or even for +different devices on the same platform. For example, under UNIX newlines +are used in files, whereas carriage-return-linefeed sequences are +normally used in network connections. On input (i.e., with \fBgets\fP +and \fBread\fP) the Tcl I/O system automatically translates the external +end-of-line representation into newline characters. Upon output (i.e., +with \fBputs\fP), the I/O system translates newlines to the external +end-of-line representation. The default translation mode, \fBauto\fP, +handles all the common cases automatically, but the \fB\-translation\fR +option provides explicit control over the end of line translations. .RS .PP The value associated with \fB\-translation\fR is a single item for -read-only and write-only channels. -The value is a two-element list for read-write channels; -the read translation mode is the first element of the list, -and the write translation mode is the second element. -As a convenience, when setting the translation mode for a read-write channel -you can specify a single value that will apply to both reading and writing. -When querying the translation mode of a read-write channel, -a two-element list will always be returned. -The following values are currently supported: +read-only and write-only channels. The value is a two-element list for +read-write channels; the read translation mode is the first element of +the list, and the write translation mode is the second element. As a +convenience, when setting the translation mode for a read-write channel +you can specify a single value that will apply to both reading and +writing. When querying the translation mode of a read-write channel, a +two-element list will always be returned. The following values are +currently supported: .TP \fBauto\fR -As the input translation mode, \fBauto\fR treats any of newline (\fBlf\fP), -carriage return (\fBcr\fP), or carriage return followed by a newline (\fBcrlf\fP) -as the end of line representation. The end of line representation can -even change from line-to-line, and all cases are translated to a newline. -As the output translation mode, \fBauto\fR chooses a platform specific -representation; for sockets on all platforms Tcl -chooses \fBcrlf\fR, for all Unix flavors, it chooses \fBlf\fR, for the +. +As the input translation mode, \fBauto\fR treats any of newline +(\fBlf\fP), carriage return (\fBcr\fP), or carriage return followed by a +newline (\fBcrlf\fP) as the end of line representation. The end of line +representation can even change from line-to-line, and all cases are +translated to a newline. As the output translation mode, \fBauto\fR +chooses a platform specific representation; for sockets on all platforms +Tcl chooses \fBcrlf\fR, for all Unix flavors, it chooses \fBlf\fR, for the Macintosh platform it chooses \fBcr\fR and for the various flavors of -Windows it chooses \fBcrlf\fR. -The default setting for \fB\-translation\fR is \fBauto\fR for both -input and output. +Windows it chooses \fBcrlf\fR. The default setting for +\fB\-translation\fR is \fBauto\fR for both input and output. +.VS 8.1 br .TP -\fBbinary\fR +\fBbinary\fR +. No end-of-line translations are performed. This is nearly identical to \fBlf\fP mode, except that in addition \fBbinary\fP mode also sets the -end of file character to the empty string, which disables it. -See the description of -\fB\-eofchar\fP for more information. +end-of-file character to the empty string (which disables it) and sets the +encoding to \fBbinary\fR (which disables encoding filtering). See the +description of \fB\-eofchar\fR and \fB\-encoding\fR for more information. +.VE .TP \fBcr\fR -The end of a line in the underlying file or device is represented -by a single carriage return character. -As the input translation mode, \fBcr\fP mode converts carriage returns -to newline characters. -As the output translation mode, \fBcr\fP mode -translates newline characters to carriage returns. -This mode is typically used on Macintosh platforms. +. +The end of a line in the underlying file or device is represented by a +single carriage return character. As the input translation mode, +\fBcr\fP mode converts carriage returns to newline characters. As the +output translation mode, \fBcr\fP mode translates newline characters to +carriage returns. This mode is typically used on Macintosh platforms. .TP \fBcrlf\fR -The end of a line in the underlying file or device is represented -by a carriage return character followed by a linefeed character. -As the input translation mode, \fBcrlf\fP mode converts -carriage-return-linefeed sequences -to newline characters. -As the output translation mode, \fBcrlf\fP mode -translates newline characters to -carriage-return-linefeed sequences. -This mode is typically used on Windows platforms and for network -connections. +. +The end of a line in the underlying file or device is represented by a +carriage return character followed by a linefeed character. As the input +translation mode, \fBcrlf\fP mode converts carriage-return-linefeed +sequences to newline characters. As the output translation mode, +\fBcrlf\fP mode translates newline characters to carriage-return-linefeed +sequences. This mode is typically used on Windows platforms and for +network connections. .TP \fBlf\fR -The end of a line in the underlying file or device is represented -by a single newline (linefeed) character. -In this mode no translations occur during either input or output. -This mode is typically used on UNIX platforms. +. +The end of a line in the underlying file or device is represented by a +single newline (linefeed) character. In this mode no translations occur +during either input or output. This mode is typically used on UNIX +platforms. .RE .PP @@ -175,4 +196,5 @@ close(n), flush(n), gets(n), puts(n), read(n), socket(n) .SH KEYWORDS blocking, buffering, carriage return, end of line, flushing, linemode, -newline, nonblocking, platform, translation +newline, nonblocking, platform, translation, encoding, filter, byte array, +binary diff --git a/doc/glob.n b/doc/glob.n index 2097534..5e7effd 100644 --- a/doc/glob.n +++ b/doc/glob.n @@ -5,10 +5,10 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) glob.n 1.11 96/08/26 12:59:59 +'\" SCCS: @(#) glob.n 1.12 97/10/17 16:52:29 '\" .so man.macros -.TH glob n 7.5 Tcl "Tcl Built-In Commands" +.TH glob n 8.1 Tcl "Tcl Built-In Commands" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME @@ -76,9 +76,18 @@ contains a ?, *, or [] construct. Unlike other Tcl commands that will accept both network and native style names (see the \fBfilename\fR manual entry for details on how native and network names are specified), the \fBglob\fR command only -accepts native names. Also, for Windows UNC names, the servername and -sharename components of the path may not contain ?, *, or [] -constructs. +accepts native names. +.VS 8.1 +.TP +\fBWindows\fR +. +For Windows UNC names, the servername and sharename components of the path +may not contain ?, *, or [] constructs. On Windows NT, if \fIpattern\fR is +of the form ``\fB~\fIusername\fB@\fIdomain\fR'' it refers to the home +directory of the user whose account information resides on the specified NT +domain server. Otherwise, user account information is obtained from +the local computer. +.VE .SH KEYWORDS exist, file, glob, pattern diff --git a/doc/man.macros b/doc/man.macros index 3af2da9..a0db675 100644 --- a/doc/man.macros +++ b/doc/man.macros @@ -59,7 +59,7 @@ '\" .UL arg1 arg2 '\" Print arg1 underlined, then print arg2 normally. '\" -'\" SCCS: @(#) man.macros 1.9 97/08/22 18:50:59 +'\" SCCS: @(#) man.macros 1.10 97/09/02 16:37:14 '\" '\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. .if t .wh -1.3i ^B @@ -72,8 +72,8 @@ . ie !"\\$2"" .TP \\n()Cu . el .TP 15 .\} -.ie !"\\$3"" \{\ .ta \\n()Au \\n()Bu +.ie !"\\$3"" \{\ \&\\$1 \\fI\\$2\\fP (\\$3) .\".b .\} diff --git a/doc/pkgMkIndex.n b/doc/pkgMkIndex.n index 702c657..23cd063 100644 --- a/doc/pkgMkIndex.n +++ b/doc/pkgMkIndex.n @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) pkgMkIndex.n 1.8 97/10/31 12:51:13 +'\" SCCS: @(#) pkgMkIndex.n 1.10 98/01/28 17:23:37 '\" .so man.macros .TH pkg_mkIndex n 7.6 Tcl "Tcl Built-In Commands" @@ -14,7 +14,9 @@ pkg_mkIndex \- Build an index for automatic loading of packages .SH SYNOPSIS .nf -\fBpkg_mkIndex \fIdir\fR \fIpattern \fR?\fIpattern pattern ...\fR? +.VS 8.1 +\fBpkg_mkIndex ?\fI-direct\fR? \fIdir\fR ?\fIpattern pattern ...\fR? +.VE .fi .BE @@ -39,6 +41,12 @@ Create the index by invoking \fBpkg_mkIndex\fR. The \fIdir\fR argument gives the name of a directory and each \fIpattern\fR argument is a \fBglob\fR-style pattern that selects script or binary files in \fIdir\fR. +.VS 8.1 +The default pattern is \fB*.tcl\fR and \fB*.[info sharedlibextension]\fR. +If the optional \fI-direct\fR argument is given, the generated index +will manage to load the package immediately upon \fBpackage require\fR +instead of delaying loading until actual use of one of the commands. +.VE \fBPkg_mkIndex\fR will create a file \fBpkgIndex.tcl\fR in \fIdir\fR with package information about all the files given by the \fIpattern\fR arguments. @@ -46,7 +54,6 @@ It does this by loading each file and seeing what packages and new commands appear (this is why it is essential to have \fBpackage provide\fR commands or \fBTcl_PkgProvide\fR calls in the files, as described above). -.VS "" br .IP [3] Install the package as a subdirectory of one of the directories given by the \fBtcl_pkgPath\fR variable. If \fB$tcl_pkgPath\fR contains more @@ -63,14 +70,13 @@ directory in \fB$tcl_pkgPath\fR it will automatically be found during .RS .LP If you install the package anywhere else, then you must ensure that -the directory contaiingn the package is in the \fBauto_path\fR global variable +the directory contain the package is in the \fBauto_path\fR global variable or an immediate subdirectory of one of the directories in \fBauto_path\fR. \fBAuto_path\fR contains a list of directories that are searched by both the auto-loader and the package loader; by default it includes \fB$tcl_pkgPath\fR. The package loader also checks all of the subdirectories of the directories in \fBauto_path\fR. -.VE You can add a directory to \fBauto_path\fR explicitly in your application, or you can add the directory to your \fBTCLLIBPATH\fR environment variable: if this environment variable is present, @@ -124,12 +130,31 @@ commands for each version of each available package; these commands invoke \fBpackage provide\fR commands to announce the availability of the package, and they setup auto-loader information to load the files of the package. -A given file of a given version of a given package isn't +.VS +Unless the \fI-direct\fR flag was provided when the \fBpkgIndex.tcl\fR +was generated, +.VE +a given file of a given version of a given package isn't actually loaded until the first time one of its commands is invoked. -Thus, after invoking \fBpackage require\fR you won't see +Thus, after invoking \fBpackage require\fR you +.VS 8.1 +may +.VE +not see the package's commands in the interpreter, but you will be able to invoke the commands and they will be auto-loaded. +.VS 8.1 +.SH "DIRECT LOADING" +.PP +Some packages, for instance packages which use namespaces and export +commands or those which require special initialization, might select +that their package files be loaded immediately upon \fBpackage require\fR +instead of delaying the actual loading to the first use of one of the +package's command. This mode is enabled when generating the package +index by specifying the \fI-direct\fR argument. +.VE + .SH KEYWORDS auto-load, index, package, version diff --git a/doc/regexp.n b/doc/regexp.n index f3951ee..ea2e0d5 100644 --- a/doc/regexp.n +++ b/doc/regexp.n @@ -1,18 +1,18 @@ '\" -'\" Copyright (c) 1993 The Regents of the University of California. -'\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 1998 Sun Microsystems, Inc. '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) regexp.n 1.12 96/08/26 13:00:10 +'\" SCCS: @(#) regexp.n 1.15 98/01/22 16:51:23 '\" .so man.macros -.TH regexp n "" Tcl "Tcl Built-In Commands" +.TH regexp n 8.1 Tcl "Tcl Built-In Commands" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME regexp \- Match a regular expression against a string + .SH SYNOPSIS \fBregexp \fR?\fIswitches\fR? \fIexp string \fR?\fImatchVar\fR? ?\fIsubMatchVar subMatchVar ...\fR? .BE @@ -59,87 +59,478 @@ portion of the expression that wasn't matched), then the corresponding \fIsubMatchVar\fR will be set to ``\fB\-1 \-1\fR'' if \fB\-indices\fR has been specified or to an empty string otherwise. -.SH "REGULAR EXPRESSIONS" -.PP -Regular expressions are implemented using Henry Spencer's package -(thanks, Henry!), -and much of the description of regular expressions below is copied verbatim -from his manual entry. -.PP -A regular expression is zero or more \fIbranches\fR, separated by ``|''. -It matches anything that matches one of the branches. -.PP -A branch is zero or more \fIpieces\fR, concatenated. -It matches a match for the first, followed by a match for the second, etc. -.PP -A piece is an \fIatom\fR possibly followed by ``*'', ``+'', or ``?''. -An atom followed by ``*'' matches a sequence of 0 or more matches of the atom. -An atom followed by ``+'' matches a sequence of 1 or more matches of the atom. -An atom followed by ``?'' matches a match of the atom, or the null string. -.PP -An atom is a regular expression in parentheses (matching a match for the -regular expression), a \fIrange\fR (see below), ``.'' -(matching any single character), ``^'' (matching the null string at the -beginning of the input string), ``$'' (matching the null string at the -end of the input string), a ``\e'' followed by a single character (matching -that character), or a single character with no other significance -(matching that character). -.PP -A \fIrange\fR is a sequence of characters enclosed in ``[]''. -It normally matches any single character from the sequence. -If the sequence begins with ``^'', -it matches any single character \fInot\fR from the rest of the sequence. -If two characters in the sequence are separated by ``\-'', this is shorthand -for the full list of ASCII characters between them -(e.g. ``[0-9]'' matches any decimal digit). -To include a literal ``]'' in the sequence, make it the first character -(following a possible ``^''). -To include a literal ``\-'', make it the first or last character. +.SH DIFFERENT FLAVORS OF REs +.VS 8.1 +Regular expressions (``RE''s), +as defined by POSIX, come in two flavors: +\fIextended\fR +REs (roughly those of +\fIegrep\fR), +``EREs'', +and +\fIbasic\fR +REs (roughly those of +\fIed\fR), +``BREs''. +This implementation adds a third flavor, +\fIadvanced\fR +REs, ``AREs'', that offer a syntax for specifying BREs and EREs. The +following primarily describes AREs. +.PP +BREs mostly exist for backward compatibility in some old programs; +they will be discussed at the end. +POSIX EREs are +\fIalmost\fR +an exact subset of AREs; +.SH "REGULAR EXPRESSION SYNTAX" +.PP +Regular expressions are implemented using the package written by Henry +Spencer, based on the 1003.2 spec and some (not quite all) of the +Perl5 extensions (thanks, Henry!). Much of the description of regular +expressions below is copied verbatim from his manual entry. +.PP +An ARE is one or more \fIbranches\fR, +separated by `|'. +It matches anything that matches any of the branches. +.PP +A branch is zero or more \fIitems\fR, concatenated. +It matches a match for the first, followed by a match for the second, etc; +an empty branch matches the empty string. +.PP +An item is an \fIatom\fR possibly followed +by a single +\fIquantifier\fR: +`*', `+', `?', or a \fIbound\fR. +An atom followed by `*' matches a sequence of 0 or more matches of the atom. +An atom followed by `+' matches a sequence of 1 or more matches of the atom. +An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. +.PP +A \fIbound\fR is `{' followed by an unsigned decimal integer, +possibly followed by `,' +possibly followed by another unsigned decimal integer, +always followed by `}'. +The integers must lie between 0 and 255 inclusive, +and if there are two of them, the first may not exceed the second. +An atom followed by a bound containing one integer \fIi\fR +and no comma matches +a sequence of exactly \fIi\fR matches of the atom. +An atom followed by a bound +containing one integer \fIi\fR and a comma matches +a sequence of \fIi\fR or more matches of the atom. +An atom followed by a bound +containing two integers \fIi\fR and \fIj\fR matches +a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. +.PP +A quantifier followed by `?' (that is, `*?', `+?', `??', `{...}?') is +a \fInon-greedy quantifier\fR, which matches the same possibilities, +but prefers the smallest number rather than the largest number +of matches. +.PP +An atom is a regular expression enclosed in `()' (matching a match for the +regular expression, with the match noted for possible reporting), +an empty set of `()' (matching an empty string, +again noted for possible reporting), +either form with the leading parenthesis replaced by `(?:' +(``non-capturing parentheses'', +as opposed to the plainer ``capturing'' form; +they have the same matching semantics but do no reporting), +a \fIbracket expression\fR (see next section), +`.' (matching any single character), +a \fIconstraint\fR (see below), +a `\e' followed by any non-alphanumeric character +(matching that character taken as an ordinary character, +e.g. `\e\e' matches a backslash character), +an \fIescape\fR (see later section) beginning with `\e' followed by an +alphanumeric character, +or a single character with no other significance (matching that character). +.PP +A \fIconstraint\fR matches an empty string when specific conditions +are met. +(Some more constraints are described later.) +`^' matches at the beginning of a line, +and `$' matches at the end of a line; +these constraints are also known as \fIanchors\fR. +A regular expression enclosed in `(?=' and `)' +(\fIpositive lookahead\fR) matches at any point +where a string matching that regular expression begins. +A regular expression enclosed in `(?!' and `)' +(\fInegative lookahead\fR) matches at any point +where a string \fInot\fR matching that regular expression begins. +Lookahead constraints may not contain back references (see later), +and all parentheses within them are treated as non-capturing. +A constraint may not be followed by a quantifier. +.PP +A `{' followed by a character other than a digit is an ordinary +character, not the beginning of a bound. +An RE may not end with an `\e'. +.SH "BRACKET EXPRESSIONS" +A \fIbracket expression\fR is a list of characters enclosed in `[]'. +It normally matches any single character from the list (but see below). +If the list begins with `^', +it matches any single character +(but see below) \fInot\fR from the rest of the list. +If two characters in the list are separated by `\-', this is shorthand +for the full \fIrange\fR of characters between those two (inclusive) in the +collating sequence, +e.g. `[0\-9]' in ASCII matches any decimal digit. +Two ranges may not share an +endpoint, e.g. `a\-c\-e'. +Ranges are very collating-sequence-dependent, +and portable programs should avoid relying on them. +.PP +To include a literal `]' in the list, make it the first character +(following a possible `^'), +or precede it with `\e'. +To include a literal `\-', make it the first or last character, +or the second endpoint of a range, +or precede it with `\e'. +To use a literal `\-' as the first endpoint of a range, +enclose it in `[.' and `.]' to make it a collating element (see below), +or precede it with `\e'. +With the exception of these, some combinations using `[' (see next +paragraphs), and escapes, +all other special characters lose their +special significance within a bracket expression. +.PP +Within a bracket expression, a collating element (a character, +a multi-character sequence that collates as if it were a single character, +or a collating-sequence name for either) +enclosed in `[.' and `.]' stands for the +sequence of characters of that collating element. +The sequence is a single element of the bracket expression's list. +A bracket expression in a locale which has +multi-character collating elements +can thus match more than one character. +Most insidiously, if `^' is used, +this can happen even if no collating +elements appear in the bracket expression! +If the collating sequence includes a `ch' collating element, +then the RE `[[.ch.]]*c' matches the first five characters +of `chchcc', +and the RE `[^c]b' matches all of `chb'. +.PP +Within a bracket expression, a collating element enclosed in `[=' and +`=]' is an equivalence class, standing for the sequences of characters +of all collating elements equivalent to that one, including itself. +(If there are no other equivalent collating elements, +the treatment is as if the enclosing delimiters were `[.' and `.]'.) +For example, if o and \o'o^' are the members of an equivalence class, +then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. +An equivalence class may not be an endpoint +of a range. +.PP +Within a bracket expression, the name of a \fIcharacter class\fR enclosed +in `[:' and `:]' stands for the list of all characters belonging to that +class. +Standard character class names are: +.PP +.RS +.nf +.ta 3c 6c 9c +alnum digit punct +alpha graph space +blank lower upper +cntrl print xdigit +.fi +.RE +.PP +These stand for the character classes defined in +\fIctype\fR(3). +A locale may provide others. +A character class may not be used as an endpoint of a range. +.PP +There are two special cases of bracket expressions: +the bracket expressions `[[:<:]]' and `[[:>:]]' +are constraints, matching empty strings at +the beginning and end of a word respectively. +A word is defined as a sequence of +word characters +which is neither preceded nor followed by +word characters. +A word character is an +\fIalnum\fR +character (as defined by +\fIctype\fR(3)) +or an underscore. +This is an extension, and its use is deprecated; +users of AREs should use constraint escapes instead (see below). +.SH ESCAPES +Escapes, which begin with a `\e' followed by an alphanumeric character, +come in several varieties: +character entry, class shorthands, constraint, and back references. +A `\e' followed by an alphanumeric character but not constituting +a valid escape is illegal in AREs. +In EREs, there are no escapes: +outside a bracket expression, +a `\e' followed by an alphanumeric character merely stands for that +character as an ordinary character, +and inside a bracket expression, `\e' is an ordinary character. +(The latter is the one actual incompatibility between POSIX and AREs.) +.PP +Character-entry escapes exist to make it easier to specify +non-printing and otherwise inconvenient characters in REs. +The Standard C escapes +`\ea' (alert, aka bell), +`\eb' (backspace), +`\ef' (formfeed), +`\en' (newline), +`\er' (carriage return), +`\et' (horizontal tab), and +`\ev' (vertical tab) +are all available. +`\ee' represents the character whose collating-sequence name +is ``ESC'', or failing that, the character with octal value 033. +`\eE' represents `\e' (reducing the backslash doubling in some +applications where there are multiple levels of backslash processing). +.PP +`\ecX', where X is any character, represents the character whose +low-order 5 bits are the same as those of X, +and whose other bits are all zero. +`\e0', `\eXY' and `\eXYZ', where X, Y, and Z are octal digits (0-7), +represent the characters whose octal values are 0, 0XY, and 0XYZ +respectively. +(See below for why `\eX' isn't in the list here.) +`\exHHH', where HHH is any sequence of hexadecimal digits (0-9, A-F, a-f), +represents the character whose hexadecimal value is 0xHHH +(a single character no matter how many hexadecimal digits are used). +`\euWXYZ', where WXYZ is exactly four hexadecimal digits, +represents the Unicode character U+WXYZ in the local byte ordering. +`\eUstuvwxyz', where stuvwxyz is exactly eight hexadecimal digits, +is reserved for a somewhat-hypothetical Unicode extension to 32 bits. +.PP +The character-entry escapes are always taken as ordinary characters. +For example, `\e135' is `]' in ASCII, +but `\e135' does not terminate a bracket expression. +Beware, however, that some applications \- e.g., C compilers\-interpret +such sequences themselves before the regular-expression package +gets to see them. +.PP +Class-shorthand escapes provide shorthands for certain commonly-used +character classes. +`\ed', `\es', and `\ew', outside bracket expressions, represent +`[[:digit:]]', `[[:space:]]', and `[[:alnum:]_]' respectively +(note the underscore in `\ew'). +`\eD', `\eS', and `\eW', outside bracket expressions, represent +the complementary classes: +`[^[:digit:]]', `[^[:space:]]', and `[^[:alnum:]_]' respectively. +Within bracket expressions, +`\ed', `\es', and `\ew' lose their outer brackets, representing +`[:digit:]', `[:space:]', and `[:alnum:]_' respectively; +`\eD', `\eS', and `\eW' are illegal within bracket expressions. +.PP +A constraint escape is a constraint, +matching the empty string if specific conditions are met, +written as an escape. +`\eA' matches only at the beginning of the string, and +`\eZ' matches only at the end of the string +(see MATCHING, below, for how they differ from `^' and `$'). +`\ey' matches only at the beginning or end of a word, and +`\eY' matches only at a point which is not the beginning or end of a word. +A word is defined as in the specification of [[:<:]] and [[:>:]] above. +Constraint escapes are illegal within bracket expressions. +.PP +A back reference is of the form `\eM', where M is a nonzero digit (1-9), +or `\eMNN', where MNN is some number of +digits (first 1-9, rest 0-9) and the value MNN +is not greater than the number of closing capturing parentheses seen so far. +A back reference matches the same string matched by the parenthesized +subexpression specified by the number, +so that (e.g.) `([bc])\e1' matches `bb' or `cc' but not `bc'. +The subexpression must entirely precede the back reference in the RE. +Subexpressions are numbered in the order of their leading parentheses. +Non-capturing parentheses do not define subexpressions. +.PP +There is an inherent historical ambiguity between octal character-entry +escapes and back references, which is resolved by heuristics. +A single non-zero digit, not followed by another digit, +is always taken as a back reference. +A leading zero always indicates an octal escape. +A multi-digit sequence not starting with a zero is taken as a back +reference if it comes after a suitable subexpression +(i.e. the number is in the legal range for a back reference), +and otherwise is taken as octal. +.SH "METASYNTAX" +In addition to the main syntax described above, there are some special +forms and miscellaneous syntactic facilities available. +These are extensions, compatible with but not specified by POSIX. +.PP +Normally the flavor of RE being used is specified by +application-dependent means. +However, this can be overridden by a \fIdirector\fR. +If an RE of any flavor begins with `***:', the rest of the RE is an ARE. +If an RE of any flavor begins with `***=', +the rest of the RE is taken to be a literal string, +with all characters considered ordinary characters. +.PP +An ARE may begin with \fIembedded options\fR: +the sequence `(?xyz)', where xyz is one or more alphabetic characters, +specifies options affecting the rest of the RE. +These supplement, and can override, +options specified by the application in application-specific ways. +The available option letters are: +.RS +.IP b 3 +rest of RE is a BRE +.IP c +case-sensitive matching (usual default) +.IP e +rest of RE is an ERE +.IP i +case-insensitive matching (see MATCHING, below) +.IP m +historical synonym for `n' +.IP n +newline-sensitive matching (see MATCHING, below) +.IP p +partial newline-sensitive matching (see MATCHING, below) +.IP q +rest of RE is a literal (``quoted'') string, all ordinary characters +.IP s +non-newline-sensitive matching (usual default) +.IP t +tight syntax (usual default; see below) +.IP w +inverse partial newline-sensitive (``weird'') matching (see MATCHING, below) +.IP x +expanded syntax (see below) +.RE +.PP +In addition to the usual (\fItight\fR) RE syntax, in which all characters are +significant, there is an \fIexpanded\fR syntax, +available in all flavors of RE +by application-specified option, or in AREs by embedded x option. +In the expanded syntax, +any white-space character +not preceded by `\e' +and not within a bracket expression is ignored, +so a complex RE can be paragraphed legibly. +There is one exception: white space is not allowed within multi-character +symbols like the ARE `(?:' or the BRE `\e('. +White-space characters are blank, tab, newline, etc. \- any character +defined as \fIspace\fR by +\fIctype\fR(3). +In addition, all characters between a non-backslashed non-bracket-expression +`#' and the following newline (or the end of the RE) are ignored, +so comments can be inserted conveniently. +Exactly how a multi-line expanded-syntax RE can be entered by a user, +if at all, is application-specific; +this is primarily a programming facility. +.PP +Finally, in an ARE, +outside bracket expressions, the sequence `(?#ttt)', +where ttt is any text not containing a `)', is a comment, +completely ignored. +Again, this is not allowed between the characters of +multi-character symbols like `(?:'. +Such comments are more a historical artifact than a useful facility, +and their use is deprecated; +use the expanded syntax instead. +.PP +\fINone\fR of these metasyntax extensions are available if the application +(or an initial `***=' director) +has specified that the user's input be treated as a literal string +rather than as an RE. .SH "CHOOSING AMONG ALTERNATIVE MATCHES" +In the event that an RE could match more than one substring of a given +string, +the RE matches the one starting earliest in the string. +If the RE could match more than one substring starting at that point, +it matches the longest. +Subexpressions also match the longest possible substrings, subject to +the constraint that the whole match be as long as possible, +with subexpressions starting earlier in the RE taking priority over +ones starting later. +Note that higher-level subexpressions thus take priority over +their lower-level component subexpressions. .PP -In general there may be more than one way to match a regular expression -to an input string. For example, consider the command -.CS -\fBregexp (a*)b* aabaaabb x y\fR -.CE -Considering only the rules given so far, \fBx\fR and \fBy\fR could -end up with the values \fBaabb\fR and \fBaa\fR, \fBaaab\fR and \fBaaa\fR, -\fBab\fR and \fBa\fR, or any of several other combinations. -To resolve this potential ambiguity \fBregexp\fR chooses among -alternatives using the rule ``first then longest''. -In other words, it considers the possible matches in order working -from left to right across the input string and the pattern, and it -attempts to match longer pieces of the input string before shorter -ones. More specifically, the following rules apply in decreasing -order of priority: -.IP [1] -If a regular expression could match two different parts of an input string -then it will match the one that begins earliest. -.IP [2] -If a regular expression contains \fB|\fR operators then the leftmost -matching sub-expression is chosen. -.IP [3] -In \fB*\fR, \fB+\fR, and \fB?\fR constructs, longer matches are chosen -in preference to shorter ones. -.IP [4] -In sequences of expression components the components are considered -from left to right. -.LP -In the example from above, \fB(a*)b*\fR matches \fBaab\fR: the \fB(a*)\fR -portion of the pattern is matched first and it consumes the leading -\fBaa\fR; then the \fBb*\fR portion of the pattern consumes the -next \fBb\fR. Or, consider the following example: -.CS -\fBregexp (ab|a)(b*)c abc x y z\fR -.CE -After this command \fBx\fR will be \fBabc\fR, \fBy\fR will be -\fBab\fR, and \fBz\fR will be an empty string. -Rule 4 specifies that \fB(ab|a)\fR gets first shot at the input -string and Rule 2 specifies that the \fBab\fR sub-expression -is checked before the \fBa\fR sub-expression. -Thus the \fBb\fR has already been claimed before the \fB(b*)\fR -component is checked and \fB(b*)\fR must match an empty string. +Match lengths are measured in characters, not collating elements. +An empty string is considered longer than no match at all. +For example, +`bb*' matches the three middle characters of `abbbc', +`(week|wee)(night|knights)' matches all ten characters of `weeknights', +when `(.*).*' is matched against `abc' the parenthesized subexpression +matches all three characters, and +when `(a*)*' is matched against `bc' both the whole RE and the parenthesized +subexpression match an empty string. +.PP +If case-independent matching is specified, +the effect is much as if all case distinctions had vanished from the +alphabet. +When an alphabetic that exists in multiple cases appears as an +ordinary character outside a bracket expression, it is effectively +transformed into a bracket expression containing both cases, +e.g. `x' becomes `[xX]'. +When it appears inside a bracket expression, all case counterparts +of it are added to the bracket expression, so that (e.g.) `[x]' +becomes `[xX]' and `[^x]' becomes `[^xX]'. +.PP +If newline-sensitive matching is specified, +and the string supplied for matching contains newlines, +`.' and complemented bracket expressions (`[^...]') +will never match the newline character, +and `^' and `$' will match the empty string after and before a newline +respectively, in addition to matching at beginning and end of string +respectively. +ARE `\eA' and `\eZ' continue to match beginning or end of string \fIonly\fR. +.PP +If partial newline-sensitive matching is specified, +this affects `.' and complemented bracket expressions as with +newline-sensitive matching, but not `^' and `$'. +.PP +If inverse partial newline-sensitive matching is specified, +this affects `^' and `$' as with +newline-sensitive matching, +but not `.' and complemented bracket expressions. +This isn't very useful but is provided for symmetry. + +.SH BASIC REGULAR EXPRESSIONS +BREs differ from EREs in several respects. +`|', `+', and `?' are ordinary characters and there is no equivalent +for their functionality. +The delimiters for bounds are `\e{' and `\e}', +with `{' and `}' by themselves ordinary characters. +The parentheses for nested subexpressions are `\e(' and `\e)', +with `(' and `)' by themselves ordinary characters. +`^' is an ordinary character except at the beginning of the +RE or the beginning of a parenthesized subexpression, +`$' is an ordinary character except at the end of the +RE or the end of a parenthesized subexpression, +and `*' is an ordinary character if it appears at the beginning of the +RE or the beginning of a parenthesized subexpression +(after a possible leading `^'). +Finally, +single-digit back references (but no other escapes) are available. +.SH "LIMITS AND BACKWARD COMPATIBILITY" +No particular limit is imposed on the length of REs or the number +paired parentheses, brackets, or braces. +.PP +In AREs, a `\e' inside [...] is an escape, +so a literal `\e' within `[]' must be written `\e\e'. +.PP +In AREs, some escapes mean special things in a bracket expression +(i.e. `\ed', `\es', `\ew') and others are illegal (i.e. `\eD', `\eS', +`\eW', `\eA', `\eZ'). +.PP +In AREs, a `{' followed by a digit will not match those two characters +but will instead start a bound. Such sequences should be rare, and +will often result in an error because following characters will not +look like a valid bound. +.PP +In AREs, a `\e' followed by an alphanumeric character is either an +escape or an error. A bunch of new escapes were treated as literal +characters in old versions of Tcl. +.PP +The longest-leftmost match is found in AREs. Old versions of Tcl +found the first-leftmost match. This may affect some old REs which +were written in the expectation that the first match would be +reported. The careful crafting of old REs to optimize the search +order for fast matching is obsolete. AREs examine all possible +matches in parallel, and their performance is largely insensitive to +their complexity, but cases where the search order was exploited to +deliberately find a match which was \fInot\fR the longest will need +rewriting. +.VE .SH KEYWORDS match, regular expression, string diff --git a/doc/resource.n b/doc/resource.n index 0062992..05a69e1 100644 --- a/doc/resource.n +++ b/doc/resource.n @@ -3,7 +3,7 @@ '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. -'\" SCCS: @(#) resource.n 1.4 97/09/10 15:22:18 +'\" SCCS: @(#) resource.n 1.5 98/01/28 12:47:09 '\" .so man.macros .TH resource n 8.0 Tcl "Tcl Built-In Commands" @@ -55,7 +55,7 @@ If the \fB-file\fR option is specified then the resource will be deleted from the file pointed to by \fIresourceRef\fR. Otherwise the first resource with the given \fIresourceName\fR and or \fIresourceId\fR which is found on the resource file path will be -deleted. To inspect the file path, use the \fIresource files\fB command. +deleted. To inspect the file path, use the \fIresource files\fR command. .RE .TP \fBresource files ?\fIresourceRef\fR? diff --git a/doc/safe.n b/doc/safe.n index 3be9c5f..57ee651 100644 --- a/doc/safe.n +++ b/doc/safe.n @@ -4,16 +4,15 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" SCCS: @(#) safe.n 1.11 97/10/31 12:51:13 +'\" SCCS: @(#) safe.n 1.12 98/01/28 12:46:57 '\" .so man.macros .TH "Safe Tcl" n 8.0 Tcl "Tcl Built-In Commands" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -Safe Base \- A mechanism for creating and manipulating safe interpreters. +Safe\ Base \- A mechanism for creating and manipulating safe interpreters. .SH SYNOPSIS -.PP \fB::safe::interpCreate\fR ?\fIslave\fR? ?\fIoptions...\fR? .sp \fB::safe::interpInit\fR \fIslave\fR ?\fIoptions...\fR? diff --git a/generic/chr.h b/generic/chr.h new file mode 100644 index 0000000..03d4157 --- /dev/null +++ b/generic/chr.h @@ -0,0 +1,48 @@ +/* + * chr.h -- + * + * Regexp package file: Unichar version of stuff related to the + * nature of a character. + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., and Sun Microsystems Inc., none of + * whom are responsible for the results. The author thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (c) 1998 by Sun Microsystems, Inc. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * SCCS: @(#) chr.h 1.4 98/01/21 14:32:38 + */ + +typedef Tcl_UniChar chr; /* internal character type */ +typedef int pchr; /* what it promotes to */ +typedef unsigned uchr; /* unsigned type big enough to hold a chr */ +#define CHRBITS (sizeof(Tcl_UniChar) * CHAR_BIT) /* bits in a chr */ +#define CHR(c) (UCHAR(c)) /* turn a char literal into a chr literal */ +#define DIGITVAL(c) ((c)-'0') /* turn a chr digit into its value */ + +/* + * char names for the externally-visible functions + */ +#define compile re_ucomp +#define exec re_uexec diff --git a/generic/color.c b/generic/color.c new file mode 100644 index 0000000..da0bd66 --- /dev/null +++ b/generic/color.c @@ -0,0 +1,605 @@ +/* + * color.c -- + * + * Regexp package file: colorings of characters. + * Note that there are some incestuous relationships between this code and + * NFA arc maintenance, which perhaps ought to be cleaned up sometime. + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., and Sun Microsystems Inc., none of + * whom are responsible for the results. The author thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (c) 1998 by Sun Microsystems, Inc. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * SCCS: @(#) color.c 1.10 98/02/11 17:23:09 + */ + +/* + * The innards. + */ +struct colors { + color ccolor[BYTTAB]; +}; +struct ptrs { + union tree *pptr[BYTTAB]; +}; +union tree { + struct colors colors; + struct ptrs ptrs; +}; +#define tcolor colors.ccolor +#define tptr ptrs.pptr +/* + * Some of the function prototypes need this. + ^ union tree; + */ + +struct colordesc { + uchr nchrs; /* number of chars of this color */ + color sub; /* open subcolor of this one, or NOSUB */ +# define NOSUB COLORLESS + struct arc *arcs; /* color chain */ +# define UNUSEDCOLOR(cd) ((cd)->nchrs == 0 && (cd)->sub == NOSUB) + int flags; +# define PSEUDO 1 /* pseudocolor, no real chars */ +}; + +struct colormap { + int magic; +# define CMMAGIC 0x876 + struct vars *v; /* for error reporting */ + color rest; + int filled; /* has it been filled? */ + int ncds; /* number of colordescs */ + struct colordesc *cd; +# define CDEND(cm) (&(cm)->cd[(cm)->ncds]) +# define NINLINECDS 10 + struct colordesc cds[NINLINECDS]; + union tree tree[NBYTS]; /* tree top, plus fill blocks */ +}; + +#ifdef COMPILE + +/* + - newcm - get new colormap + ^ static struct colormap *newcm(struct vars *); + */ +static struct colormap * /* NULL for allocation failure */ +newcm(v) +struct vars *v; +{ + struct colormap *cm; + int i; + int j; + union tree *t; + union tree *nextt; + struct colordesc *cd; + + cm = (struct colormap *)ckalloc(sizeof(struct colormap)); + if (cm == NULL) { + ERR(REG_ESPACE); + return NULL; + } + cm->magic = CMMAGIC; + cm->v = v; + cm->rest = WHITE; + cm->filled = 0; + + cm->ncds = NINLINECDS; + cm->cd = cm->cds; + for (cd = cm->cd; cd < CDEND(cm); cd++) { + cd->nchrs = 0; + cd->sub = NOSUB; + cd->arcs = NULL; + cd->flags = 0; + } + cm->cd[WHITE].nchrs = WCHAR_MAX - WCHAR_MIN; + + /* treetop starts as NULLs if there are lower levels */ + t = cm->tree; + if (NBYTS > 1) { + for (i = BYTTAB-1; i >= 0; i--) + t->tptr[i] = NULL; + } + + /* if no lower levels, treetop and last fill block are the same */ + + /* fill blocks point to next fill block... */ + for (t = &cm->tree[1], j = NBYTS-2; j > 0; t = nextt, j--) { + nextt = t + 1; + for (i = BYTTAB-1; i >= 0; i--) + t->tptr[i] = t + 1; + } + /* ...except last which is solid white */ + t = &cm->tree[NBYTS-1]; + for (i = BYTTAB-1; i >= 0; i--) + t->tcolor[i] = WHITE; + + + return cm; +} + +/* + - freecm - free a colormap + ^ static VOID freecm(struct colormap *); + */ +static VOID +freecm(cm) +struct colormap *cm; +{ + cm->magic = 0; + if (NBYTS > 1) { + cmtreefree(cm, cm->tree, 0); + } + if (cm->cd != cm->cds) { + ckfree((char *)cm->cd); + } + ckfree((char *) cm); /* mem leak (CCS). */ +} + +/* + - cmtreefree - free a non-terminal part of a colormap tree + ^ static VOID cmtreefree(struct colormap *, union tree *, int); + */ +static VOID +cmtreefree(cm, tree, level) +struct colormap *cm; +union tree *tree; +int level; /* level number (top == 0) of this block */ +{ + int i; + union tree *t; + union tree *fillt = &cm->tree[level+1]; + + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { + t = tree->tptr[i]; + if (t != NULL && t != fillt) { + if ((int) level < (int) NBYTS-2) { /* more pointer blocks below */ + cmtreefree(cm, t, level+1); + } + ckfree((char *) t); + } + } +} + +/* + - fillcm - fill in a colormap, so no NULLs remain + * The point of this is that the tree traversal can then be a fixed set + * of table lookups with no conditional branching. It might be better + * to do reallocation for a more compacted structure, on the order of + * what's done for NFAs, but the colormap can be quite large and a total + * rebuild of it could be costly. + ^ static VOID fillcm(struct colormap *); + */ +static VOID +fillcm(cm) +struct colormap *cm; +{ + if (!cm->filled && NBYTS > 1) + cmtreefill(cm, cm->tree, 0); + cm->filled = 1; +} + +/* + - cmtreefill - fill a non-terminal part of a colormap tree + ^ static VOID cmtreefill(struct colormap *, union tree *, int); + */ +static VOID +cmtreefill(cm, tree, level) +struct colormap *cm; +union tree *tree; +int level; /* level number (top == 0) of this block */ +{ + int i; + union tree *t; + union tree *fillt = &cm->tree[level+1]; + + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { + t = tree->tptr[i]; + if (t == fillt) /* oops */ + {} + else if (t == NULL) { + tree->tptr[i] = fillt; + } + else if ((int) level < (int) NBYTS-2) {/* more pointer blocks below */ + cmtreefill(cm, t, level+1); + } + } +} + +#endif /* ifdef COMPILE */ + +/* + - getcolor - get the color of a character from a colormap + ^ static color getcolor(struct colormap *, pchr); + */ +static color +getcolor(cm, c) +struct colormap *cm; +pchr c; +{ + uchr uc = c; + int shift; + int b; + union tree *t; + + assert(cm->magic == CMMAGIC); + + t = cm->tree; + for (shift = BYTBITS * (NBYTS - 1); t != NULL; shift -= BYTBITS) { + b = (uc >> shift) & BYTMASK; + if (shift == 0) /* reached the bottom */ + return t->tcolor[b]; + t = t->tptr[b]; + } + + /* we fell off an incomplete part of the tree */ + assert(!cm->filled); + return cm->rest; +} + +#ifdef COMPILE + +/* + - setcolor - set the color of a character in a colormap + ^ static color setcolor(struct colormap *, pchr, pcolor); + */ +static color /* previous color */ +setcolor(cm, c, co) +struct colormap *cm; +pchr c; +pcolor co; +{ + uchr uc = c; + int shift; + int i; + int b; + int bottom; + union tree *t; + union tree *lastt; + color prev; + + assert(cm->magic == CMMAGIC); + if (VISERR(cm->v) || co == COLORLESS) + return COLORLESS; + + t = cm->tree; + for (shift = BYTBITS * (NBYTS - 1); shift > 0; shift -= BYTBITS) { + b = (uc >> shift) & BYTMASK; + lastt = t; + t = t->tptr[b]; + if (t == NULL) { /* fell off an incomplete part */ + bottom = (shift <= BYTBITS) ? 1 : 0; + t = (union tree *)ckalloc((bottom) ? + sizeof(struct colors) : sizeof(struct ptrs)); + if (t == NULL) { + VERR(cm->v, REG_ESPACE); + return COLORLESS; + } + if (bottom) + for (i = BYTTAB-1; i >= 0; i--) + t->tcolor[i] = cm->rest; + else + for (i = BYTTAB-1; i >= 0; i--) + t->tptr[i] = NULL; + lastt->tptr[b] = t; + } + } + assert(shift == 0 && t != NULL); /* we hit bottom; it's there */ + + b = uc & BYTMASK; + prev = t->tcolor[b]; + t->tcolor[b] = (color) co; + return prev; +} + +/* + - maxcolor - report largest color number in use + ^ static color maxcolor(struct colormap *); + */ +static color +maxcolor(cm) +struct colormap *cm; +{ + struct colordesc *cd; + struct colordesc *end; + struct colordesc *lastused; + + if (VISERR(cm->v)) + return COLORLESS; + + lastused = NULL; + end = CDEND(cm); + for (cd = cm->cd; cd < end; cd++) + if (!UNUSEDCOLOR(cd)) + lastused = cd; + assert(lastused != NULL); + return (color) (lastused - cm->cd); +} + +/* + - newcolor - find a new color (must be subject of setcolor at once) + * Beware: may relocate the colordescs. + ^ static color newcolor(struct colormap *); + */ +static color /* COLORLESS for error */ +newcolor(cm) +struct colormap *cm; +{ + struct colordesc *cd; + struct colordesc *end; + struct colordesc *firstnew; + int n; + + if (VISERR(cm->v)) + return COLORLESS; + + end = CDEND(cm); + for (cd = cm->cd; cd < end; cd++) + if (UNUSEDCOLOR(cd)) { + assert(cd->arcs == NULL); + return (color) (cd - cm->cd); + } + + /* oops, must allocate more */ + n = cm->ncds * 2; + if (cm->cd == cm->cds) { + cd = (struct colordesc *)ckalloc(sizeof(struct colordesc) * n); + if (cd != NULL) + memcpy((VOID *)cd, (VOID *)cm->cds, cm->ncds * + sizeof(struct colordesc)); + } else { + cd = (struct colordesc *)ckrealloc((VOID *)cm->cd, + sizeof(struct colordesc) * n); + } + if (cd == NULL) { + VERR(cm->v, REG_ESPACE); + return COLORLESS; + } + cm->cd = cd; + firstnew = CDEND(cm); + cm->ncds = n; + end = CDEND(cm); + for (cd = firstnew; cd < end; cd++) { + cd->nchrs = 0; + cd->sub = NOSUB; + cd->arcs = NULL; + cd->flags = 0; + } + assert(firstnew < CDEND(cm) && UNUSEDCOLOR(firstnew)); + return (color) (firstnew - cm->cd); +} + +/* + - pseudocolor - allocate a false color, to be managed by other means + ^ static color pseudocolor(struct colormap *); + */ +static color +pseudocolor(cm) +struct colormap *cm; +{ + color co; + + co = newcolor(cm); + if (VISERR(cm->v)) + return COLORLESS; + cm->cd[co].nchrs = 1; + cm->cd[co].flags = PSEUDO; + return co; +} + +/* + - subcolor - allocate a new subcolor (if necessary) to this chr + ^ static color subcolor(struct colormap *, pchr c); + */ +static color +subcolor(cm, c) +struct colormap *cm; +pchr c; +{ + color co; /* current color of c */ + color sco; /* new subcolor */ + + co = getcolor(cm, c); + sco = cm->cd[co].sub; + if (sco == NOSUB) { /* must create subcolor */ + if (cm->cd[co].nchrs == 1) /* shortcut */ + return co; + sco = newcolor(cm); + if (sco == COLORLESS) + return COLORLESS; + cm->cd[co].sub = sco; + cm->cd[sco].sub = sco; /* self-referential subcolor ptr */ + } + + if (co == sco) /* repeated character */ + return co; /* no further action needed */ + cm->cd[co].nchrs--; + cm->cd[sco].nchrs++; + setcolor(cm, c, sco); + return sco; +} + +/* + - okcolors - promote subcolors to full colors + ^ static VOID okcolors(struct nfa *, struct colormap *); + */ +static VOID +okcolors(nfa, cm) +struct nfa *nfa; +struct colormap *cm; +{ + struct colordesc *cd; + struct colordesc *end = CDEND(cm); + struct colordesc *scd; + struct arc *a; + color co; + color sco; + + for (cd = cm->cd, co = 0; cd < end; cd++, co++) { + sco = cd->sub; + if (sco == NOSUB) { + /* has no subcolor, no further action */ + } else if (sco == co) { + /* is subcolor, let parent deal with it */ + } else if (cd->nchrs == 0) { + /* parent empty, its arcs change color to subcolor */ + cd->sub = NOSUB; + scd = &cm->cd[sco]; + assert(scd->nchrs > 0); + assert(scd->sub == sco); + scd->sub = NOSUB; + while ((a = cd->arcs) != NULL) { + assert(a->co == co); + /* uncolorchain(cm, a); */ + cd->arcs = a->colorchain; + a->co = sco; + /* colorchain(cm, a); */ + a->colorchain = scd->arcs; + scd->arcs = a; + } + } else { + /* parent's arcs must gain parallel subcolor arcs */ + cd->sub = NOSUB; + scd = &cm->cd[sco]; + assert(scd->nchrs > 0); + assert(scd->sub == sco); + scd->sub = NOSUB; + for (a = cd->arcs; a != NULL; a = a->colorchain) { + assert(a->co == co); + newarc(nfa, a->type, sco, a->from, a->to); + } + } + } +} + +/* + - colorchain - add this arc to the color chain of its color + ^ static VOID colorchain(struct colormap *, struct arc *); + */ +static VOID +colorchain(cm, a) +struct colormap *cm; +struct arc *a; +{ + struct colordesc *cd = &cm->cd[a->co]; + + a->colorchain = cd->arcs; + cd->arcs = a; +} + +/* + - uncolorchain - delete this arc from the color chain of its color + ^ static VOID uncolorchain(struct colormap *, struct arc *); + */ +static VOID +uncolorchain(cm, a) +struct colormap *cm; +struct arc *a; +{ + struct colordesc *cd = &cm->cd[a->co]; + struct arc *aa; + + aa = cd->arcs; + if (aa == a) /* easy case */ + cd->arcs = a->colorchain; + else { + for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) + continue; + assert(aa != NULL); + aa->colorchain = a->colorchain; + } + a->colorchain = NULL; /* paranoia */ +} + +/* + - singleton - is this character in its own color? + ^ static int singleton(struct colormap *, pchr c); + */ +static int /* predicate */ +singleton(cm, c) +struct colormap *cm; +pchr c; +{ + color co; /* color of c */ + + co = getcolor(cm, c); + if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) + return 1; + return 0; +} + +/* + - rainbow - add arcs of all full colors (but one) between specified states + ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, + ^ struct state *, struct state *); + */ +static VOID +rainbow(nfa, cm, type, exc, from, to) +struct nfa *nfa; +struct colormap *cm; +int type; +pcolor exc; /* COLORLESS if no exceptions */ +struct state *from; +struct state *to; +{ + struct colordesc *cd; + struct colordesc *end = CDEND(cm); + color co; + + for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++) + if (!UNUSEDCOLOR(cd) && cd->sub != co && co != exc && + !(cd->flags&PSEUDO)) + newarc(nfa, type, co, from, to); +} + +/* + - colorcomplement - add arcs of complementary colors + * The calling sequence ought to be reconciled with cloneouts(). + ^ static VOID colorcomplement(struct nfa *, struct colormap *, int, + ^ struct state *, struct state *, struct state *); + */ +static VOID +colorcomplement(nfa, cm, type, of, from, to) +struct nfa *nfa; +struct colormap *cm; +int type; +struct state *of; /* complements of this guy's PLAIN outarcs */ +struct state *from; +struct state *to; +{ + struct colordesc *cd; + struct colordesc *end = CDEND(cm); + color co; + + assert(of != from); + for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++) + if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) + if (findarc(of, PLAIN, co) == NULL) + newarc(nfa, type, co, from, to); +} + +#endif /* ifdef COMPILE */ diff --git a/generic/compile.c b/generic/compile.c new file mode 100644 index 0000000..0649be6 --- /dev/null +++ b/generic/compile.c @@ -0,0 +1,2089 @@ +/* + * compile.c -- + * + * Regexp package file: re_*comp and friends - compile REs + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., and Sun Microsystems Inc., none of + * whom are responsible for the results. The author thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (c) 1998 by Sun Microsystems, Inc. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * SCCS: @(#) compile.c 1.12 98/02/11 17:25:30 + */ + +#include "tclInt.h" +#include +#include "tclPort.h" +#include "tclRegexp.h" +#include "chr.h" +#include "guts.h" + +/* + * forward declarations, up here so forward datatypes etc. are defined early + */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ +/* === compile.c === */ +int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int)); +static VOID moresubs _ANSI_ARGS_((struct vars *, int)); +static int freev _ANSI_ARGS_((struct vars *, int)); +static struct rtree *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int)); +static int scannum _ANSI_ARGS_((struct vars *)); +static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int)); +static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); +static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); +static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *)); +static chr *scanplain _ANSI_ARGS_((struct vars *)); +static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *)); +static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *)); +static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *)); +static color nlcolor _ANSI_ARGS_((struct vars *)); +static VOID wordchrs _ANSI_ARGS_((struct vars *)); +static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *)); +static struct rtree *newrt _ANSI_ARGS_((struct vars *)); +static VOID freert _ANSI_ARGS_((struct rtree *)); +static VOID freertnode _ANSI_ARGS_((struct rtree *)); +static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *)); +static int numrt _ANSI_ARGS_((struct rtree *, int)); +static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *)); +static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *)); +static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int)); +static VOID freelacons _ANSI_ARGS_((struct subre *, int)); +static VOID rfree _ANSI_ARGS_((regex_t *)); +static VOID dump _ANSI_ARGS_((regex_t *, FILE *)); +static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int)); +static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int)); +/* === lex.c === */ +static VOID lexstart _ANSI_ARGS_((struct vars *)); +static VOID prefixes _ANSI_ARGS_((struct vars *)); +static VOID lexnest _ANSI_ARGS_((struct vars *, chr *)); +static VOID lexword _ANSI_ARGS_((struct vars *)); +static int next _ANSI_ARGS_((struct vars *)); +static int lexescape _ANSI_ARGS_((struct vars *)); +static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int)); +static int brenext _ANSI_ARGS_((struct vars *, pchr)); +static VOID skip _ANSI_ARGS_((struct vars *)); +static chr newline _ANSI_ARGS_((VOID)); +static chr *ch _ANSI_ARGS_((VOID)); +static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, pchr)); +/* === locale.c === */ +#define MAXCE 2 /* longest CE code is prepared to handle */ +typedef wint_t celt; /* type holding distinct codes for all chrs, all CEs */ +static int nces _ANSI_ARGS_((struct vars *)); +static int nleaders _ANSI_ARGS_((struct vars *)); +static struct cvec *allces _ANSI_ARGS_((struct vars *, struct cvec *)); +static celt element _ANSI_ARGS_((struct vars *, chr *, chr *)); +static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int)); +static int before _ANSI_ARGS_((celt, celt)); +static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int)); +static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int)); +static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr)); +static int sncmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); +static struct cvec *newcvec _ANSI_ARGS_((int, int)); +static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); +static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); +static VOID addce _ANSI_ARGS_((struct cvec *, chr *)); +static int haschr _ANSI_ARGS_((struct cvec *, pchr)); +static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int)); +static VOID freecvec _ANSI_ARGS_((struct cvec *)); +/* === color.c === */ +union tree; +static struct colormap *newcm _ANSI_ARGS_((struct vars *)); +static VOID freecm _ANSI_ARGS_((struct colormap *)); +static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int)); +static VOID fillcm _ANSI_ARGS_((struct colormap *)); +static VOID cmtreefill _ANSI_ARGS_((struct colormap *, union tree *, int)); +static color getcolor _ANSI_ARGS_((struct colormap *, pchr)); +static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor)); +static color maxcolor _ANSI_ARGS_((struct colormap *)); +static color newcolor _ANSI_ARGS_((struct colormap *)); +static color pseudocolor _ANSI_ARGS_((struct colormap *)); +static color subcolor _ANSI_ARGS_((struct colormap *, pchr c)); +static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *)); +static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *)); +static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *)); +static int singleton _ANSI_ARGS_((struct colormap *, pchr c)); +static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *)); +static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *)); +/* === nfa.c === */ +static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct nfa *)); +static VOID freenfa _ANSI_ARGS_((struct nfa *)); +static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); +static struct state *newstate _ANSI_ARGS_((struct nfa *)); +static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *)); +static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *)); +static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); +static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *)); +static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int)); +static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *)); +static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID specialcolors _ANSI_ARGS_((struct nfa *)); +static VOID optimize _ANSI_ARGS_((struct nfa *)); +static VOID pullback _ANSI_ARGS_((struct nfa *)); +static int pull _ANSI_ARGS_((struct nfa *, struct arc *)); +static VOID pushfwd _ANSI_ARGS_((struct nfa *)); +static int push _ANSI_ARGS_((struct nfa *, struct arc *)); +#define INCOMPATIBLE 1 /* destroys arc */ +#define SATISFIED 2 /* constraint satisfied */ +#define COMPATIBLE 3 /* compatible but not satisfied yet */ +static int combine _ANSI_ARGS_((struct arc *, struct arc *)); +static VOID fixempties _ANSI_ARGS_((struct nfa *)); +static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); +static VOID cleanup _ANSI_ARGS_((struct nfa *)); +static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); +static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); +static int analyze _ANSI_ARGS_((struct vars *, struct nfa *)); +static int isempty _ANSI_ARGS_((struct state *, struct state *)); +static VOID compact _ANSI_ARGS_((struct vars *, struct nfa *, struct cnfa *)); +static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *)); +static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int)); +static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *)); +static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + + + +/* internal variables, bundled for easy passing around */ +struct vars { + regex_t *re; + chr *now; /* scan pointer into string */ + chr *stop; /* end of string */ + chr *savenow; /* saved now and stop for "subroutine call" */ + chr *savestop; + int err; /* error code (0 if none) */ + int cflags; /* copy of compile flags */ + int lasttype; /* type of previous token */ + int nexttype; /* type of next token */ + chr nextvalue; /* value (if any) of next token */ + int lexcon; /* lexical context type (see lex.c) */ + int nsubexp; /* subexpression count */ + struct subre **subs; /* subRE pointer vector */ + size_t nsubs; /* length of vector */ + struct subre *sub10[10]; /* initial vector, enough for most */ + struct nfa *nfa; /* the NFA */ + struct colormap *cm; /* character color map */ + color nlcolor; /* color of newline */ + struct state *wordchrs; /* state in nfa holding word-char outarcs */ + struct rtree *tree; /* subexpression tree */ + int ntree; /* number of tree nodes */ + struct cvec *cv; /* utility cvec */ + struct cvec *ces; /* collating-element information */ +# define ISCELEADER(v,c) (v->ces != NULL && haschr(v->ces, (c))) + struct state *cepbegin; /* state in nfa, start of CE prototypes */ + struct state *cepend; /* state in nfa, end of CE prototypes */ + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ + int usedshorter; /* used short-preferring quantifiers */ +}; + +/* parsing macros; most know that `v' is the struct vars pointer */ +#define NEXT() (next(v)) /* advance by one token */ +#define SEE(t) (v->nexttype == (t)) /* is next token this? */ +#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ + ((vv)->err = (e))) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return;} /* if error seen, return */ +#define NOERRN() {if (ISERR()) goto end;} /* NOERR with retval */ +#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ +#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ +#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) + +/* token type codes, some also used as NFA arc types */ +#define EMPTY 'n' /* no token present */ +#define EOS 'e' /* end of string */ +#define PLAIN 'p' /* ordinary character */ +#define DIGIT 'd' /* digit (in bound) */ +#define BACKREF 'b' /* back reference */ +#define COLLEL 'I' /* start of [. */ +#define ECLASS 'E' /* start of [= */ +#define CCLASS 'C' /* start of [: */ +#define END 'X' /* end of [. [= [: */ +#define RANGE 'R' /* - within [] which might be range delim. */ +#define LACON 'L' /* lookahead constraint subRE */ +#define AHEAD 'a' /* color-lookahead arc */ +#define BEHIND 'r' /* color-lookbehind arc */ +#define WBDRY 'w' /* word boundary constraint */ +#define NWBDRY 'W' /* non-word-boundary constraint */ +#define SBEGIN 'A' /* beginning of string (even if not BOL) */ +#define SEND 'Z' /* end of string (even if not EOL) */ +#define PREFER 'P' /* length preference */ + +/* is an arc colored, and hence on a color chain? */ +#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ + (a)->type == BEHIND) + + + +/* static function list */ +static struct fns functions = { + rfree, /* regfree insides */ +}; + + + +/* + - regfree - free an RE (actually, just overall coordination) + */ +VOID +regfree(re) +regex_t *re; +{ + if (re == NULL || re->re_magic != REMAGIC) + return; /* no way we can report it, really */ + + /* free it, calling internal routine that knows details */ + (*((struct fns *)re->re_fns)->free)(re); + + re->re_magic = 0; +} + +/* + - compile - compile regular expression + ^ int compile(regex_t *, CONST chr *, size_t, int); + */ +int +compile(re, string, len, flags) +regex_t *re; +CONST chr *string; +size_t len; +int flags; +{ + struct vars var; + struct vars *v = &var; + struct guts *g; + int i; +# define CNOERR() { if (ISERR()) return freev(v, v->err); } + + if (re == NULL) { + return REG_INVARG; + } + + /* + * Init re to known state, because we will try to free it if + * compilation fails. + */ + + re->re_magic = 0; + + /* sanity checks */ + if (string == NULL || + ((flags®_EXTENDED) && (flags®_QUOTE)) || + (!(flags®_EXTENDED) && (flags®_ADVF))) { + return REG_INVARG; + } + + /* initial setup (after which freev() is callable) */ + v->re = re; + v->now = (chr *)string; + v->stop = v->now + len; + v->savenow = v->savestop = NULL; + v->err = 0; + v->cflags = flags; + v->nsubexp = 0; + v->subs = v->sub10; + v->nsubs = 10; + for (i = 0; (size_t) i < v->nsubs; i++) + v->subs[i] = NULL; + v->nfa = NULL; + v->cm = NULL; + v->nlcolor = COLORLESS; + v->wordchrs = NULL; + v->tree = NULL; + v->cv = NULL; + v->ces = NULL; + v->lacons = NULL; + v->nlacons = 0; + re->re_info = 0; /* bits get set during parse */ + re->re_guts = NULL; + re->re_fns = NULL; + + /* more complex setup, malloced things */ + v->cm = newcm(v); /* colormap must precede nfa... */ + CNOERR(); + v->nfa = newnfa(v, (struct nfa *)NULL); /* ...newnfa() uses it */ + CNOERR(); + re->re_guts = ckalloc(sizeof(struct guts)); + if (re->re_guts == NULL) + return freev(v, REG_ESPACE); + g = (struct guts *)re->re_guts; + ZAPCNFA(g->cnfa); + g->tree = NULL; + g->cm = NULL; + g->lacons = NULL; + g->nlacons = 0; + v->cv = newcvec(100, 10); + if (v->cv == NULL) + return freev(v, REG_ESPACE); + i = nces(v); + if (i > 0) { + v->ces = newcvec(nleaders(v), i); + CNOERR(); + v->ces = allces(v, v->ces); + leaders(v, v->ces); + } + CNOERR(); + + /* parsing */ + lexstart(v); /* also handles prefixes */ + if (SEE(EOS)) /* empty RE is illegal */ + return freev(v, REG_EMPTY); + v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET); + assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ + CNOERR(); + + /* finish setup of nfa and its subre tree */ + specialcolors(v->nfa); + CNOERR(); + if (flags®_PROGRESS) { + dumpnfa(v->nfa, stdout); + dumprt(v->tree, stdout, 1); + } + v->usedshorter = 0; + optrt(v, v->tree); + if (v->tree != NULL) + v->ntree = numrt(v->tree, 1); + else + v->ntree = 0; + if (flags®_PROGRESS) { + printf("-->\n"); + dumprt(v->tree, stdout, 1); + } + + /* build compacted NFAs for tree, lacons, main nfa */ + nfatree(v, v->tree); + if (flags®_PROGRESS) { + printf("---->\n"); + dumprt(v->tree, stdout, 1); + } + CNOERR(); + assert(v->nlacons == 0 || v->lacons != NULL); + for (i = 1; i < v->nlacons; i++) + nfanode(v, &v->lacons[i]); + CNOERR(); + optimize(v->nfa); /* removes unreachable states */ + CNOERR(); + if (v->nfa->post->nins <= 0) + return freev(v, REG_IMPOSS); /* end unreachable! */ + assert(v->nfa->pre->nouts > 0); + compact(v, v->nfa, &g->cnfa); + CNOERR(); + freenfa(v->nfa); + v->nfa = NULL; + + /* fill color map */ + fillcm(v->cm); + CNOERR(); + + /* looks okay, package it up */ + re->re_magic = REMAGIC; + re->re_nsub = v->nsubexp; + /* re_info is already set */ + re->re_csize = sizeof(chr); + re->re_guts = (VOID *)g; + re->re_fns = (VOID *)&functions; + v->re = NULL; + g->magic = GUTSMAGIC; + g->cflags = v->cflags; + g->info = re->re_info; + g->nsub = re->re_nsub; + g->cm = v->cm; + v->cm = NULL; + g->tree = v->tree; + v->tree = NULL; + g->ntree = v->ntree; + g->compare = (v->cflags®_ICASE) ? sncmp : wcsncmp; + g->lacons = v->lacons; + v->lacons = NULL; + g->nlacons = v->nlacons; + g->usedshorter = v->usedshorter; + + if (flags®_DUMP) + dump(re, stdout); + + assert(v->err == 0); + return freev(v, 0); +} + +/* + - moresubs - enlarge subRE vector + ^ static VOID moresubs(struct vars *, int); + */ +static VOID +moresubs(v, wanted) +struct vars *v; +int wanted; /* want enough room for this one */ +{ + struct subre **p; + size_t n; + + assert((size_t)wanted >= v->nsubs); + n = (size_t)wanted * 3 / 2 + 1; + if (v->subs == v->sub10) { + p = (struct subre **)ckalloc(n * sizeof(struct subre *)); + if (p != NULL) + memcpy((VOID *)p, (VOID *)v->subs, + v->nsubs * sizeof(struct subre *)); + } else + p = (struct subre **) ckrealloc((VOID *)v->subs, + n * sizeof(struct subre *)); + if (p == NULL) { + ERR(REG_ESPACE); + return; + } + v->subs = p; + for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) + *p = NULL; + assert(v->nsubs == n); + assert((size_t)wanted < v->nsubs); +} + +/* + - freev - free vars struct's substructures where necessary + * Does optional error-number setting, and returns error code, to make + * error code terser. + ^ static int freev(struct vars *, int); + */ +static int +freev(v, err) +struct vars *v; +int err; +{ + if (v->re != NULL) + rfree(v->re); + if (v->subs != v->sub10) + ckfree((char *)v->subs); + if (v->nfa != NULL) + freenfa(v->nfa); + if (v->cm != NULL) + freecm(v->cm); + if (v->tree != NULL) + freert(v->tree); + if (v->cv != NULL) + freecvec(v->cv); + if (v->ces != NULL) + freecvec(v->ces); + if (v->lacons != NULL) + freelacons(v->lacons, v->nlacons); + ERR(err); + + return v->err; +} + +/* + - parse - parse an RE + * Arguably this is too big and too complex and ought to be divided up. + * However, the code is somewhat intertwined... + ^ static struct rtree *parse(struct vars *, int, int, struct state *, + ^ struct state *, int); + */ +static struct rtree * /* NULL if no interesting substructure */ +parse(v, stopper, type, init, final, pprefer) +struct vars *v; +int stopper; /* EOS or ')' */ +int type; /* LACON (lookahead subRE) or PLAIN */ +struct state *init; /* initial state */ +struct state *final; /* final state */ +int pprefer; /* parent's short/long preference */ +{ + struct state *left; /* scaffolding for branch */ + struct state *right; + struct state *lp; /* scaffolding for current construct */ + struct state *rp; + struct state *s; /* temporaries for new states */ + struct state *s2; +# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) + int m, n; + int emptybranch; /* is there anything in this branch yet? */ + color co; + struct rtree *branches; /* top level */ + struct rtree *branch; /* current branch */ + struct subre *now; /* current subtree's top */ + struct subre sub; /* communication variable */ + struct rtree *rt1; /* temporaries */ + struct rtree *rt2; + struct subre *t; /* work pointer, top of interesting subtree */ + int firstbranch; /* is this the first branch? */ + int capture; /* any capturing parens within this? */ + int constraint; /* is the current atom a constraint? */ + + assert(stopper == ')' || stopper == EOS); + + branch = NULL; /* lint. */ + rt1 = NULL; /* lint. */ + + capture = 0; + branches = newrt(v); + firstbranch = 1; + NOERRN(); + do { + /* a branch */ + emptybranch = 1; /* tentatively */ + left = newstate(v->nfa); + right = newstate(v->nfa); + if (!firstbranch) + rt1 = newrt(v); +#if 1 + if (ISERR()) { + freert(rt1); + freert(branches); /* mem leak (CCS). */ + return NULL; + } +#else + NOERRN(); +#endif + EMPTYARC(init, left); + EMPTYARC(right, final); + lp = left; + rp = right; + if (firstbranch) + branch = branches; + else { + branch->next = rt1; + branch = rt1; + } + branch->op = '|'; + now = &branch->left; + *now = subre(left, right, NONEYET, 0, (struct rtree *)NULL); + firstbranch = 0; + NOERRN(); + + while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { + /* initial bookkeeping */ + sub.begin = NULL; /* no substructure seen yet */ + sub.subno = 0; + sub.prefer = NONEYET; + constraint = 0; + if (emptybranch) /* first of the branch */ + emptybranch = 0; + else { /* implicit concat operator */ + lp = newstate(v->nfa); + NOERRN(); + moveins(v->nfa, rp, lp); + } + assert(lp->nouts == 0); /* must string new code */ + assert(rp->nins == 0); /* between lp and rp */ + + /* an atom... */ + switch (v->nexttype) { + case '(': /* value flags as capturing or non */ + m = (type == LACON) ? 0 : v->nextvalue; + if (m) { + v->nsubexp++; + sub.subno = v->nsubexp; + if ((size_t)sub.subno >= v->nsubs) + moresubs(v, sub.subno); + assert((size_t) sub.subno < v->nsubs); + } else + sub.subno = 0; + NEXT(); + sub.begin = lp; /* NB, substructure seen */ + sub.end = rp; + /* use now->tree as temporary, so */ + /* things get freed on error returns */ + assert(now->tree == NULL); + now->tree = parse(v, ')', PLAIN, lp, rp, + now->prefer); + assert(SEE(')') || ISERR()); + NEXT(); + NOERRN(); + if (!m && now->tree == NULL) { + /* actually no relevant substructure */ + sub.begin = NULL; + } + if (now->tree != NULL) { + if (now->tree->op == '|') + sub.prefer = LONGER; + else + sub.prefer = + now->tree->left.prefer; + } + /* must postpone other processing until we */ + /* know about any {0,0} quantifier */ + break; + case BACKREF: /* the Feature From The Black Lagoon */ + INSIST(type != LACON, REG_ESUBREG); + INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); + INSIST(v->subs[v->nextvalue] != NULL, + REG_ESUBREG); + NOERRN(); + assert(v->nextvalue > 0); + sub.subno = -v->nextvalue; + sub.begin = lp; /* NB, substructure seen */ + sub.end = rp; + EMPTYARC(lp, rp); /* temporarily */ + assert(now->tree == NULL); + NEXT(); + break; + case LACON: /* lookahead constraint */ + m = v->nextvalue; /* is positive? */ + NEXT(); + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERRN(); + rt1 = parse(v, ')', LACON, s, s2, NONEYET); + assert(SEE(')') || ISERR()); + NEXT(); + m = newlacon(v, s, s2, m); + freert(rt1); + NOERRN(); + ARCV(LACON, m); + constraint = 1; + break; + case PREFER: /* length preference */ + sub.prefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + sub.begin = lp; /* NB, substructure seen */ + sub.end = rp; + /* use now->tree as temporary, so */ + /* things get freed on error returns */ + assert(now->tree == NULL); + now->tree = parse(v, ')', PLAIN, lp, rp, + sub.prefer); + assert(SEE(')') || ISERR()); + NEXT(); + NOERRN(); + if (now->prefer == NONEYET) + now->prefer = sub.prefer; + if (sub.prefer == now->prefer && + now->tree == NULL) { + /* actually no relevant substructure */ + sub.begin = NULL; + } + break; + case '[': + if (v->nextvalue == 1) + bracket(v, lp, rp); + else + cbracket(v, lp, rp); + assert(SEE(']') || ISERR()); + NEXT(); + break; + case '.': + co = (color) ((v->cflags®_NLSTOP) + ? nlcolor(v) + : COLORLESS); + rainbow(v->nfa, v->cm, PLAIN, co, lp, rp); + NEXT(); + break; + case '^': + ARCV('^', 1); + if (v->cflags®_NLANCH) + ARCV(BEHIND, nlcolor(v)); + NEXT(); + constraint = 1; + break; + case '$': + ARCV('$', 1); + if (v->cflags®_NLANCH) + ARCV(AHEAD, nlcolor(v)); + NEXT(); + constraint = 1; + break; + case SBEGIN: + ARCV('^', 1); /* BOL */ + ARCV('^', 0); /* or BOS */ + NEXT(); + constraint = 1; + break; + case SEND: + ARCV('$', 1); /* EOL */ + ARCV('$', 0); /* or EOS */ + NEXT(); + constraint = 1; + break; + case '<': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERRN(); + /* needs BOL, BOS, or nonword to left... */ + newarc(v->nfa, '^', 1, lp, s); + newarc(v->nfa, '^', 0, lp, s); + colorcomplement(v->nfa, v->cm, BEHIND, + v->wordchrs, lp, s); + /* ... and word to right */ + cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD); + /* (no need for special attention to \n) */ + constraint = 1; + break; + case '>': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERRN(); + /* needs word to left... */ + cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND); + /* ... and EOL, EOS, or nonword to right */ + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, + v->wordchrs, s, rp); + /* (no need for special attention to \n) */ + constraint = 1; + break; + case WBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERRN(); + /* needs BOL, BOS, or nonword to left... */ + newarc(v->nfa, '^', 1, lp, s); + newarc(v->nfa, '^', 0, lp, s); + colorcomplement(v->nfa, v->cm, BEHIND, + v->wordchrs, lp, s); + /* ... and word to right... */ + cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD); + /* ...or... */ + s = newstate(v->nfa); + NOERRN(); + /* ...needs word to left... */ + cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND); + /* ... and EOL, EOS, or nonword to right */ + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, + v->wordchrs, s, rp); + /* (no need for special attention to \n) */ + constraint = 1; + break; + case NWBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERRN(); + /* needs word to both left and right... */ + cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND); + cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD); + /* ...or... */ + s = newstate(v->nfa); + NOERRN(); + /* ...BOL, BOS, or nonword to left... */ + newarc(v->nfa, '^', 1, lp, s); + newarc(v->nfa, '^', 0, lp, s); + colorcomplement(v->nfa, v->cm, BEHIND, + v->wordchrs, lp, s); + /* ... and EOL, EOS, or nonword to right */ + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, + v->wordchrs, s, rp); + /* (no need for special attention to \n) */ + constraint = 1; + break; + case ')': /* unbalanced paren */ + if (!(v->cflags®_EXTENDED) || + (v->cflags®_ADVF)) { + ERR(REG_EPAREN); + goto end; + } + NOTE(REG_UPBOTCH); + /* fallthrough into case PLAIN */ + case PLAIN: + onechr(v, v->nextvalue, lp, rp); + okcolors(v->nfa, v->cm); + NOERRN(); + NEXT(); + break; + case '*': + case '+': + case '?': + case '{': + ERR(REG_BADRPT); + goto end; + default: + ERR(REG_ASSERT); + goto end; + } + + /* ...possibly followed by a quantifier */ + switch (v->nexttype) { + case '*': + m = 0; + n = INFINITY; + sub.prefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '+': + m = 1; + n = INFINITY; + sub.prefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '?': + m = 0; + n = 1; + sub.prefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '{': + NEXT(); + m = scannum(v); + if (EAT(',')) { + if (SEE(DIGIT)) + n = scannum(v); + else + n = INFINITY; + if (m > n) { + ERR(REG_BADBR); + goto end; + } + } else + n = m; + if (!SEE('}')) { /* gets errors too */ + ERR(REG_BADBR); + goto end; + } + if (m != n) + sub.prefer = (v->nextvalue) ? LONGER : + SHORTER; + NEXT(); + break; + default: /* no quantifier */ + m = n = 1; + constraint = 0; + break; + } + + /* constraints may not be quantified */ + if (constraint) { + ERR(REG_BADRPT); + goto end; + } + + /* annoying special case: {0,0} cancels everything */ + if (m == 0 && n == 0 && sub.begin != NULL) { + freert(now->tree); + now->tree = NULL; + sub.begin = NULL; /* no substructure */ + sub.prefer = NONEYET; + /* the repeat() below will do the rest */ + } + + /* if no substructure, aVOID hard part */ + if (now->prefer == NONEYET) + now->prefer = sub.prefer; + if (sub.begin == NULL && (sub.prefer == NONEYET || + sub.prefer == now->prefer)) { + assert(sub.subno >= 0 || (m == 0 && n == 0)); + if (!(m == 1 && n == 1)) + repeat(v, lp, rp, m, n); + continue; /* NOTE CONTINUE */ + } + + /* hard part: something messy seen */ + /* break subRE into pre, x{...}, post-to-be */ + capture = 1; /* upper levels will care */ + rt1 = newrt(v); + rt2 = newrt(v); + s = newstate(v->nfa); /* between x and post-to-be */ + NOERRN(); + moveins(v->nfa, rp, s); + EMPTYARC(s, rp); + rt1->op = ','; + rt1->left = subre(now->begin, lp, now->prefer, 0, + (struct rtree *)NULL); + assert(now->end == rp); + rt1->right = subre(lp, rp, sub.prefer, 0, rt2); + rt2->op = ','; + rt2->left = subre(lp, s, sub.prefer, 0, now->tree); + rt2->right = subre(s, rp, NONEYET, 0, + (struct rtree *)NULL); + now->tree = rt1; + now = &rt2->right; /* future elaborations here */ + t = &rt2->left; /* current activity here */ + + /* if it's a backref, time to replicate the subNFA */ + if (sub.subno < 0) { + assert(lp->nouts == 1); /* just the EMPTY */ + delsub(v->nfa, lp, s); + assert(v->subs[-sub.subno] != NULL); + dupnfa(v->nfa, v->subs[-sub.subno]->begin, + v->subs[-sub.subno]->end, lp, s); + NOERRN(); + } + + /* if no/vacuous quantifier and not backref, done */ + if (m == 1 && n == 1 && sub.subno >= 0) { + t->subno = sub.subno; + if (sub.subno > 0) + v->subs[sub.subno] = t; + continue; /* NOTE CONTINUE */ + } + + /* really sticky part, quantified capturer/backref */ + /* first, turn x{0,...} into x{1,...}| */ + if (m == 0) { + s = newstate(v->nfa); + s2 = newstate(v->nfa); + rt1 = newrt(v); + rt2 = newrt(v); + NOERRN(); + moveouts(v->nfa, t->begin, s); + EMPTYARC(t->begin, s); + EMPTYARC(t->begin, s2); + EMPTYARC(s2, t->end); + rt1->op = rt2->op = '|'; + rt1->left = subre(s, t->end, sub.prefer, 0, + t->tree); + rt1->next = rt2; + rt2->left = subre(s2, t->end, sub.prefer, 0, + (struct rtree *)NULL); + t->tree = rt1; + t = &rt1->left; + m = 1; + } + + /* second, x{1,1} is just x */ + if (m == 1 && n == 1 && sub.subno >= 0) { + t->subno = sub.subno; + if (sub.subno > 0) + v->subs[sub.subno] = t; + continue; /* NOTE CONTINUE */ + } + + /* backrefs get special treatment */ + if (sub.subno < 0) { + repeat(v, t->begin, t->end, m, n); + rt1 = newrt(v); + NOERRN(); + assert(t->tree == NULL); + t->tree = rt1; + rt1->op = 'b'; + rt1->left.subno = sub.subno; + rt1->left.min = (short) m; + rt1->left.max = (short) n; + rt1->left.prefer = sub.prefer; + continue; /* NOTE CONTINUE */ + } + + /* turn x{m,n} into x{m-1,n-1}x, with capturing */ + /* parens in only second x */ + s = newstate(v->nfa); + NOERRN(); + moveouts(v->nfa, t->begin, s); + dupnfa(v->nfa, s, t->end, t->begin, s); + assert(m >= 1 && m != INFINITY && n >= 1); + repeat(v, t->begin, s, m-1, (n == INFINITY) ? n : n-1); + rt1 = newrt(v); + NOERRN(); + rt1->op = ','; + rt1->left = subre(t->begin, s, sub.prefer, 0, + (struct rtree *)NULL); + /* sub.prefer not really right, but doesn't matter */ + rt1->right = subre(s, t->end, sub.prefer, sub.subno, + t->tree); + if (sub.subno > 0) + v->subs[sub.subno] = &rt1->right; + t->tree = rt1; + } + if (emptybranch) { + NOTE(REG_UUNSPEC); + EMPTYARC(lp, rp); + } + } while (EAT('|')); + assert(SEE(stopper) || SEE(EOS)); + + if (!SEE(stopper)) { + assert(stopper == ')' && SEE(EOS)); + ERR(REG_EPAREN); + } + + /* higher levels care about our preference in certain situations */ + if (branch != branches) { /* >1 branch */ + if (pprefer != LONGER) + capture = 1; + } else if (branches->left.prefer != pprefer) + capture = 1; + + /* optimize out vacuous alternation */ + if (branch == branches) { + assert(branch->next == NULL && branch->right.begin == NULL); + assert(branch->left.subno == 0); + if (capture && branch->left.tree == NULL) + branch->op = ','; + else { + branches = branch->left.tree; /* might be NULL */ + freertnode(branch); + } + } + + if (capture) /* actually a catchall flag */ + return branches; + end: /* mem leak (CCS) */ + freert(branches); + return NULL; +} + +/* + - scannum - scan a number + ^ static int scannum(struct vars *); + */ +static int /* value, <= DUPMAX */ +scannum(v) +struct vars *v; +{ + int n = 0; + + while (SEE(DIGIT) && n < DUPMAX) { + n = n*10 + v->nextvalue; + NEXT(); + } + if (SEE(DIGIT) || n > DUPMAX) { + ERR(REG_BADBR); + return 0; + } + return n; +} + +/* + - repeat - replicate subNFA for quantifiers + * The duplication sequences used here are chosen carefully so that any + * pointers starting out pointing into the subexpression end up pointing into + * the last occurrence. (Note that it may not be strung between the same + * left and right end states, however!) This used to be important for the + * subRE tree, although the important bits are now handled by the in-line + * code in parse(), and when this is called, it doesn't matter any more. + ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); + */ +static VOID +repeat(v, lp, rp, m, n) +struct vars *v; +struct state *lp; +struct state *rp; +int m; +int n; +{ +# define SOME 2 +# define INF 3 +# define PAIR(x, y) ((x)*4 + (y)) +# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) + CONST int rm = REDUCE(m); + CONST int rn = REDUCE(n); + struct state *s; + struct state *s2; + + switch (PAIR(rm, rn)) { + case PAIR(0, 0): /* empty string */ + delsub(v->nfa, lp, rp); + EMPTYARC(lp, rp); + break; + case PAIR(0, 1): /* do as x| */ + EMPTYARC(lp, rp); + break; + case PAIR(0, SOME): /* do as x{1,n}| */ + repeat(v, lp, rp, 1, n); + NOERR(); + EMPTYARC(lp, rp); + break; + case PAIR(0, INF): /* loop x around */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s); + EMPTYARC(lp, s); + EMPTYARC(s, rp); + break; + case PAIR(1, 1): /* no action required */ + break; + case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, 1, n-1); + NOERR(); + EMPTYARC(lp, s); + break; + case PAIR(1, INF): /* add loopback arc */ + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s2); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); + EMPTYARC(s2, s); + break; + case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m-1, n-1); + break; + case PAIR(SOME, INF): /* do as x{m-1,}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m-1, n); + break; + default: + ERR(REG_ASSERT); + break; + } +} + +/* + - bracket - handle non-complemented bracket expression + * Also called from cbracket for complemented bracket expressions. + ^ static VOID bracket(struct vars *, struct state *, struct state *); + */ +static VOID +bracket(v, lp, rp) +struct vars *v; +struct state *lp; +struct state *rp; +{ + assert(SEE('[')); + NEXT(); + while (!SEE(']') && !SEE(EOS)) + brackpart(v, lp, rp); + assert(SEE(']') || ISERR()); + okcolors(v->nfa, v->cm); +} + +/* + - cbracket - handle complemented bracket expression + * We do it by calling bracket() with dummy endpoints, and then complementing + * the result. The alternative would be to invoke rainbow(), and then delete + * arcs as the b.e. is seen... but that gets messy. + ^ static VOID cbracket(struct vars *, struct state *, struct state *); + */ +static VOID +cbracket(v, lp, rp) +struct vars *v; +struct state *lp; +struct state *rp; +{ + struct state *left = newstate(v->nfa); + struct state *right = newstate(v->nfa); + struct state *s; + struct arc *a; /* arc from lp */ + struct arc *ba; /* arc from left, from bracket() */ + struct arc *pa; /* CE-prototype arc */ + color co; + chr *p; + int i; + + NOERR(); + bracket(v, left, right); + if (v->cflags®_NLSTOP) + newarc(v->nfa, PLAIN, nlcolor(v), left, right); + NOERR(); + + assert(lp->nouts == 0); /* all outarcs will be ours */ + + /* easy part of complementing */ + colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); + NOERR(); + if (v->ces == NULL) { /* no CEs -- we're done */ + dropstate(v->nfa, left); + assert(right->nins == 0); + freestate(v->nfa, right); + return; + } + + /* but complementing gets messy in the presence of CEs... */ + NOTE(REG_ULOCALE); + for (p = v->ces->chrs, i = v->ces->nchrs; i > 0; p++, i--) { + co = getcolor(v->cm, *p); + a = findarc(lp, PLAIN, co); + ba = findarc(left, PLAIN, co); + if (ba == NULL) { + assert(a != NULL); + freearc(v->nfa, a); + } else { + assert(a == NULL); + } + s = newstate(v->nfa); + NOERR(); + newarc(v->nfa, PLAIN, co, lp, s); + NOERR(); + pa = findarc(v->cepbegin, PLAIN, co); + assert(pa != NULL); + if (ba == NULL) { /* easy case, need all of them */ + cloneouts(v->nfa, pa->to, s, rp, PLAIN); + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); + } else { /* must be selective */ + if (findarc(ba->to, '$', 1) == NULL) { + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, pa->to, + s, rp); + } + for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) + if (findarc(ba->to, PLAIN, pa->co) == NULL) + newarc(v->nfa, PLAIN, pa->co, s, rp); + if (s->nouts == 0) /* limit of selectivity: none */ + dropstate(v->nfa, s); /* frees arc too */ + } + NOERR(); + } + + delsub(v->nfa, left, right); + assert(left->nouts == 0); + freestate(v->nfa, left); + assert(right->nins == 0); + freestate(v->nfa, right); +} + +/* + - brackpart - handle one item (or range) within a bracket expression + ^ static VOID brackpart(struct vars *, struct state *, struct state *); + */ +static VOID +brackpart(v, lp, rp) +struct vars *v; +struct state *lp; +struct state *rp; +{ + celt startc; + celt endc; + struct cvec *cv; + chr *startp; + chr *endp; + chr c[1]; + + /* parse something, get rid of special cases, take shortcuts */ + switch (v->nexttype) { + case RANGE: /* a-b-c or other botch */ + ERR(REG_ERANGE); + return; + case PLAIN: + c[0] = v->nextvalue; + NEXT(); + /* shortcut for ordinary chr (not range, not CE leader) */ + if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { + onechr(v, c[0], lp, rp); + return; + } + startc = element(v, c, c+1); + NOERR(); + break; + case COLLEL: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + break; + case ECLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + cv = eclass(v, startc, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); + return; + case CCLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECTYPE); + NOERR(); + cv = cclass(v, startp, endp, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); + return; + default: + ERR(REG_ASSERT); + return; + } + + if (SEE(RANGE)) { + NEXT(); + switch (v->nexttype) { + case PLAIN: + case RANGE: + c[0] = v->nextvalue; + NEXT(); + endc = element(v, c, c+1); + NOERR(); + break; + case COLLEL: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + endc = element(v, startp, endp); + NOERR(); + break; + default: + ERR(REG_ERANGE); + return; + } + } else + endc = startc; + + /* + * Ranges are unportable. Actually, standard C does + * guarantee that digits are contiguous, but making + * that an exception is just too complicated. + */ + if (startc != endc) + NOTE(REG_UUNPORT); + cv = range(v, startc, endc, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); +} + +/* + - scanplain - scan PLAIN contents of [. etc. + * Certain bits of trickery in lex.c know that this code does not try + * to look past the final bracket of the [. etc. + ^ static chr *scanplain(struct vars *); + */ +static chr * /* just after end of sequence */ +scanplain(v) +struct vars *v; +{ + chr *endp; + + assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); + NEXT(); + + endp = v->now; + while (SEE(PLAIN)) { + endp = v->now; + NEXT(); + } + + assert(SEE(END) || ISERR()); + NEXT(); + + return endp; +} + +/* + - leaders - process a cvec of collating elements to also include leaders + * Also gives all characters involved their own colors, which is almost + * certainly necessary, and sets up little disconnected subNFA. + ^ static VOID leaders(struct vars *, struct cvec *); + */ +static VOID +leaders(v, cv) +struct vars *v; +struct cvec *cv; +{ + int ce; + chr *p; + chr leader; + struct state *s; + struct arc *a; + + v->cepbegin = newstate(v->nfa); + v->cepend = newstate(v->nfa); + NOERR(); + + for (ce = 0; ce < cv->nces; ce++) { + p = cv->ces[ce]; + leader = *p; + if (!haschr(cv, leader)) { + addchr(cv, leader); + s = newstate(v->nfa); + newarc(v->nfa, PLAIN, subcolor(v->cm, leader), + v->cepbegin, s); + okcolors(v->nfa, v->cm); + } else { + a = findarc(v->cepbegin, PLAIN, + getcolor(v->cm, leader)); + assert(a != NULL); + s = a->to; + assert(s != v->cepend); + } + p++; + assert(*p != 0 && *(p+1) == 0); /* only 2-char CEs at present */ + newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->cepend); + okcolors(v->nfa, v->cm); + } +} + +/* + - onechr - fill in arcs for a plain character, and possible case complements + * This is mostly a shortcut for efficient handling of the common case. + ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); + */ +static VOID +onechr(v, c, lp, rp) +struct vars *v; +pchr c; +struct state *lp; +struct state *rp; +{ + if (!(v->cflags®_ICASE)) { + newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); + return; + } + + /* rats, need general case anyway... */ + dovec(v, allcases(v, c), lp, rp); +} + +/* + - dovec - fill in arcs for each element of a cvec + * This one has to handle the messy cases, like CEs and CE leaders. + ^ static VOID dovec(struct vars *, struct cvec *, struct state *, + ^ struct state *); + */ +static VOID +dovec(v, cv, lp, rp) +struct vars *v; +struct cvec *cv; +struct state *lp; +struct state *rp; +{ + chr *p; + chr *np; + int i; + color co; + struct arc *a; + struct arc *pa; /* arc in prototype */ + struct state *s; + struct state *ps; /* state in prototype */ + + /* first, get the ordinary characters out of the way */ + np = cv->chrs; + for (p = np, i = cv->nchrs; i > 0; p++, i--) + if (!ISCELEADER(v, *p)) { + newarc(v->nfa, PLAIN, subcolor(v->cm, *p), lp, rp); + *p = 0; + } else { + assert(singleton(v->cm, *p)); + *np++ = *p; + } + cv->nchrs = np - cv->chrs; /* only CE leaders remain */ + if (cv->nchrs == 0 && cv->nces == 0) + return; + + /* deal with the CE leaders */ + NOTE(REG_ULOCALE); + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { + co = getcolor(v->cm, *p); + a = findarc(lp, PLAIN, co); + if (a != NULL) + s = a->to; + else { + s = newstate(v->nfa); + NOERR(); + newarc(v->nfa, PLAIN, co, lp, s); + NOERR(); + } + pa = findarc(v->cepbegin, PLAIN, co); + assert(pa != NULL); + ps = pa->to; + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); + NOERR(); + } + + /* and the CEs */ + for (i = 0; i < cv->nces; i++) { + p = cv->ces[i]; + assert(singleton(v->cm, *p)); + co = getcolor(v->cm, *p++); + a = findarc(lp, PLAIN, co); + if (a != NULL) + s = a->to; + else { + s = newstate(v->nfa); + NOERR(); + newarc(v->nfa, PLAIN, co, lp, s); + NOERR(); + } + assert(*p != 0); /* at least two chars */ + assert(singleton(v->cm, *p)); + co = getcolor(v->cm, *p++); + assert(*p == 0); /* and only two, for now */ + newarc(v->nfa, PLAIN, co, s, rp); + NOERR(); + } +} + +/* + - nlcolor - assign newline a unique color, if it doesn't have one already + * Restriction: can't be called when there are subcolors open. (Maybe + * this should be enforced...) + ^ static color nlcolor(struct vars *); + */ +static color +nlcolor(v) +struct vars *v; +{ + if (v->nlcolor == COLORLESS) { + v->nlcolor = subcolor(v->cm, newline()); + okcolors(v->nfa, v->cm); + } + return v->nlcolor; +} + +/* + - wordchrs - set up word-chr list for word-boundary stuff, if needed + * The list is kept as a bunch of arcs between two dummy states; it's + * disposed of by the unreachable-states sweep in NFA optimization. + * Does NEXT(). Must not be called from any unusual lexical context. + * This should be reconciled with the \w etc. handling in lex.c, and + * should be cleaned up to reduce dependencies on input scanning. + ^ static VOID wordchrs(struct vars *); + */ +static VOID +wordchrs(v) +struct vars *v; +{ + struct state *left; + struct state *right; + + if (v->wordchrs != NULL) { + NEXT(); /* for consistency */ + return; + } + + left = newstate(v->nfa); + right = newstate(v->nfa); + NOERR(); + lexword(v); + NEXT(); + assert(v->savenow != NULL && SEE('[')); + bracket(v, left, right); + assert(((v->savenow != NULL) && SEE(']')) || ISERR()); + NEXT(); + NOERR(); + v->wordchrs = left; +} + +/* + - subre - construct a subre struct + ^ static struct subre subre(struct state *, struct state *, int, int, + ^ struct rtree *); + */ +static struct subre +subre(begin, end, prefer, subno, tree) +struct state *begin; +struct state *end; +int prefer; +int subno; +struct rtree *tree; +{ + struct subre ret; + + ret.begin = begin; + ret.end = end; + ret.prefer = prefer; + ret.subno = subno; + ret.min = ret.max = 1; + ret.tree = tree; + ZAPCNFA(ret.cnfa); + return ret; +} + +/* + - newrt - allocate subRE-tree node + ^ static struct rtree *newrt(struct vars *); + */ +static struct rtree * +newrt(v) +struct vars *v; +{ + struct rtree *rt = (struct rtree *)ckalloc(sizeof(struct rtree)); + + if (rt == NULL) { + ERR(REG_ESPACE); + return NULL; + } + + rt->op = '?'; /* invalid */ + rt->no = 0; + rt->left.begin = NULL; + rt->left.end = NULL; + rt->left.prefer = NONEYET; + rt->left.subno = 0; + rt->left.min = rt->left.max = 1; + rt->left.tree = NULL; + ZAPCNFA(rt->left.cnfa); + rt->right.begin = NULL; + rt->right.end = NULL; + rt->right.prefer = NONEYET; + rt->right.subno = 0; + rt->right.min = rt->right.max = 1; + rt->right.tree = NULL; + ZAPCNFA(rt->right.cnfa); + rt->next = NULL; + return rt; +} + +/* + - freert - free a subRE subtree + ^ static VOID freert(struct rtree *); + */ +static VOID +freert(rt) +struct rtree *rt; +{ + if (rt == NULL) + return; + + if (rt->left.tree != NULL) + freert(rt->left.tree); + if (rt->right.tree != NULL) + freert(rt->right.tree); + if (rt->next != NULL) + freert(rt->next); + + freertnode(rt); +} + +/* + - freertnode - free one node in a subRE subtree + ^ static VOID freertnode(struct rtree *); + */ +static VOID +freertnode(rt) +struct rtree *rt; +{ + if (rt == NULL) + return; + + if (!NULLCNFA(rt->left.cnfa)) + freecnfa(&rt->left.cnfa, 0); + if (!NULLCNFA(rt->right.cnfa)) + freecnfa(&rt->right.cnfa, 0); + + ckfree((char *)rt); +} + +/* + - optrt - optimize a subRE subtree + ^ static VOID optrt(struct vars *, struct rtree *); + */ +static VOID +optrt(v, rt) +struct vars *v; +struct rtree *rt; +{ + struct rtree *t; + int subno; + + if (rt == NULL) + return; + assert(rt->op != 'b'); + + /* pull up subtrees if possible */ + if (rt->left.begin != NULL && rt->left.tree != NULL && + rt->left.tree->op != 'b') { + t = rt->left.tree; + optrt(v, t); + if (t->right.begin == NULL && t->next == NULL && + (rt->left.prefer == NONEYET || + t->left.prefer == rt->left.prefer) && + (rt->left.subno == 0 || t->left.subno == 0)) { + subno = rt->left.subno; + rt->left = t->left; + assert(NULLCNFA(t->left.cnfa)); + freertnode(t); + if (subno != 0) { + assert(rt->left.subno == 0 && subno > 0); + rt->left.subno = subno; + } + } + } + if (rt->right.begin != NULL && rt->right.tree != NULL && + rt->right.tree->op != 'b') { + t = rt->right.tree; + optrt(v, t); + if (t->right.begin == NULL && t->next == NULL && + (rt->right.prefer == NONEYET || + t->right.prefer == rt->right.prefer) && + (rt->right.subno == 0 || t->right.subno == 0)) { + subno = rt->right.subno; + rt->right = t->left; + assert(NULLCNFA(t->right.cnfa)); + freertnode(t); + if (subno != 0) { + assert(rt->right.subno == 0 && subno > 0); + rt->right.subno = subno; + } + } + } + + /* simplify empties */ + if (rt->left.begin != NULL && isempty(rt->left.begin, rt->left.end)) + rt->left.end = rt->left.begin; + if (rt->right.begin != NULL && isempty(rt->right.begin, rt->right.end)) + rt->right.end = rt->right.begin; + + /* if left subtree vacuous and right non-empty, move right over */ + if (rt->left.begin != NULL && rt->left.begin == rt->left.end && + rt->left.subno == 0 && rt->left.tree == NULL && + rt->right.begin != NULL) { + rt->left = rt->right; + rt->right.begin = NULL; + rt->right.tree = NULL; + } + + /* if right subtree vacuous, clear it out */ + if (rt->right.begin != NULL && rt->right.begin == rt->right.end && + rt->right.subno == 0 && rt->right.tree == NULL) { + rt->right.begin = NULL; + rt->right.tree = NULL; + } + + /* preference cleanup and analysis */ + if (rt->left.prefer == NONEYET) + rt->left.prefer = LONGER; + if (rt->left.prefer == SHORTER) + v->usedshorter = 1; + if (rt->right.begin != NULL) { + if (rt->right.prefer == NONEYET) + rt->right.prefer = LONGER; + if (rt->right.prefer == SHORTER) + v->usedshorter = 1; + } + + /* recurse through alternatives */ + if (rt->next != NULL) + optrt(v, rt->next); +} + +/* + - numrt - number tree nodes + ^ static int numrt(struct rtree *, int); + */ +static int /* next number */ +numrt(rt, start) +struct rtree *rt; +int start; /* starting point for subtree numbers */ +{ + int i; + + assert(rt != NULL); + + i = start; + rt->no = (short) i++; + if (rt->left.tree != NULL) + i = numrt(rt->left.tree, i); + if (rt->right.tree != NULL) + i = numrt(rt->right.tree, i); + if (rt->next != NULL) + i = numrt(rt->next, i); + return i; +} + +/* + - nfatree - turn a subRE subtree into a tree of compacted NFAs + ^ static VOID nfatree(struct vars *, struct rtree *); + */ +static VOID +nfatree(v, rt) +struct vars *v; +struct rtree *rt; +{ + if (rt == NULL) + return; + + if (rt->left.begin != NULL) + nfanode(v, &rt->left); + if (rt->left.tree != NULL) + nfatree(v, rt->left.tree); + + if (rt->right.begin != NULL) + nfanode(v, &rt->right); + if (rt->right.tree != NULL) + nfatree(v, rt->right.tree); + + if (rt->next != NULL) + nfatree(v, rt->next); +} + +/* + - nfanode - do one NFA for nfatree + ^ static VOID nfanode(struct vars *, struct subre *); + */ +static VOID +nfanode(v, sub) +struct vars *v; +struct subre *sub; +{ + struct nfa *nfa; + + if (sub->begin == NULL) + return; + + nfa = newnfa(v, v->nfa); + NOERR(); + dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final); + if (!ISERR()) { + specialcolors(nfa); + optimize(nfa); + } + if (!ISERR()) + compact(v, nfa, &sub->cnfa); + freenfa(nfa); +} + +/* + - newlacon - allocate a lookahead-constraint subRE + ^ static int newlacon(struct vars *, struct state *, struct state *, int); + */ +static int /* lacon number */ +newlacon(v, begin, end, pos) +struct vars *v; +struct state *begin; +struct state *end; +int pos; +{ + int n; + struct subre *sub; + + if (v->nlacons == 0) { + v->lacons = (struct subre *)ckalloc(2 * sizeof(struct subre)); + n = 1; /* skip 0th */ + v->nlacons = 2; + } else { + v->lacons = (struct subre *)ckrealloc((VOID *) v->lacons, + (v->nlacons+1)*sizeof(struct subre)); + n = v->nlacons++; + } + if (v->lacons == NULL) { + ERR(REG_ESPACE); + return 0; + } + sub = &v->lacons[n]; + sub->begin = begin; + sub->end = end; + sub->subno = pos; + ZAPCNFA(sub->cnfa); + return n; +} + +/* + - freelacons - free lookahead-constraint subRE vector + ^ static VOID freelacons(struct subre *, int); + */ +static VOID +freelacons(subs, n) +struct subre *subs; +int n; +{ + struct subre *sub; + int i; + + for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) + if (!NULLCNFA(sub->cnfa)) + freecnfa(&sub->cnfa, 0); + ckfree((char *)subs); +} + +/* + - rfree - free a whole RE (insides of regfree) + ^ static VOID rfree(regex_t *); + */ +static VOID +rfree(re) +regex_t *re; /* regfree has validated it */ +{ + struct guts *g = (struct guts *)re->re_guts; + + re->re_magic = 0; /* invalidate it */ + re->re_guts = NULL; + re->re_fns = NULL; + g->magic = 0; + if (!NULLCNFA(g->cnfa)) + freecnfa(&g->cnfa, 0); + if (g->cm != NULL) + freecm(g->cm); + if (g->tree != NULL) + freert(g->tree); + if (g->lacons != NULL) + freelacons(g->lacons, g->nlacons); + ckfree((char *)g); +} + +/* + - dumprt - dump a subRE tree + ^ static VOID dumprt(struct rtree *, FILE *, int); + */ +static VOID +dumprt(rt, f, nfapresent) +struct rtree *rt; +FILE *f; +int nfapresent; /* is the original NFA still around? */ +{ + if (rt == NULL) + fprintf(f, "null tree\n"); + else + rtdump(rt, f, nfapresent, 0); + fflush(f); +} + +/* + - rtdump - recursive guts of dumprt + ^ static VOID rtdump(struct rtree *, FILE *, int, int); + */ +static VOID +rtdump(rt, f, nfapresent, level) +struct rtree *rt; +FILE *f; +int nfapresent; /* is the original NFA still around? */ +int level; +{ + int i; +# define RTSEP " " + + for (i = 0; i < level; i++) + fprintf(f, RTSEP); + fprintf(f, "%c (n%d) {\n", rt->op, rt->no); + if (rt->left.begin != NULL) { + for (i = 0; i < level+1; i++) + fprintf(f, RTSEP); + fprintf(f, "L"); + fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" : + ((rt->left.prefer == LONGER) ? ">" : "<")); + if (nfapresent) + fprintf(f, "%ld-%ld", (long)rt->left.begin->no, + (long)rt->left.end->no); + if (rt->left.subno > 0) + fprintf(f, " (%d)", rt->left.subno); + else if (rt->left.subno < 0) { + fprintf(f, " \\%d", -rt->left.subno); + if (rt->left.min != 1 || rt->left.max != 1) { + fprintf(f, "{%d-", (int)rt->left.min); + if (rt->left.max != INFINITY) + fprintf(f, "%d", (int)rt->left.max); + fprintf(f, "}"); + } + if (rt->left.tree != NULL) + fprintf(f, "(nonNULL tree!!)"); + } + if (rt->left.tree != NULL || !NULLCNFA(rt->left.cnfa)) + fprintf(f, ":"); + fprintf(f, "\n"); + if (!NULLCNFA(rt->left.cnfa)) + dumpcnfa(&rt->left.cnfa, f); + if (rt->left.tree != NULL) + rtdump(rt->left.tree, f, nfapresent, level+1); + } else if (rt->op == 'b') { + for (i = 0; i < level+1; i++) + fprintf(f, RTSEP); + fprintf(f, "L"); + fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" : + ((rt->left.prefer == LONGER) ? ">" : "<")); + assert(rt->left.subno < 0); + fprintf(f, " \\%d", -rt->left.subno); + if (rt->left.min != 1 || rt->left.max != 1) { + fprintf(f, "{%d-", (int)rt->left.min); + if (rt->left.max != INFINITY) + fprintf(f, "%d", (int)rt->left.max); + fprintf(f, "}"); + } + if (rt->left.tree != NULL) + fprintf(f, "(nonNULL tree!!)"); + fprintf(f, "\n"); + } + + if (rt->right.begin != NULL) { + if (rt->op != ',') + fprintf(f, "op %c has non-NULL right tree\n", rt->op); + for (i = 0; i < level+1; i++) + fprintf(f, RTSEP); + fprintf(f, "R"); + fprintf(f, "%s", (rt->right.prefer == NONEYET) ? "-" : + ((rt->right.prefer == LONGER) ? ">" : "<")); + if (nfapresent) + fprintf(f, "%ld-%ld", (long)rt->right.begin->no, + (long)rt->right.end->no); + if (rt->right.subno > 0) + fprintf(f, " (%d)", rt->right.subno); + else if (rt->right.subno < 0) { + fprintf(f, " \\%d", -rt->right.subno); + if (rt->right.min != 1 || rt->right.max != 1) { + fprintf(f, "{%d-", (int)rt->right.min); + if (rt->right.max != INFINITY) + fprintf(f, "%d", (int)rt->right.max); + fprintf(f, "}"); + } + if (rt->right.tree != NULL) + fprintf(f, "(nonNULL tree!!)"); + } + if (rt->right.tree != NULL || !NULLCNFA(rt->right.cnfa)) + fprintf(f, ":"); + fprintf(f, "\n"); + if (!NULLCNFA(rt->right.cnfa)) + dumpcnfa(&rt->right.cnfa, f); + if (rt->right.tree != NULL) + rtdump(rt->right.tree, f, nfapresent, level+1); + } + for (i = 0; i < level; i++) + fprintf(f, RTSEP); + fprintf(f, "}\n"); + + if (rt->next != NULL) { + if (rt->op != '|') + fprintf(f, "op %c has non-NULL next\n", rt->op); + if (rt->next->op != rt->op) + fprintf(f, "next op %c, expecting %c\n", rt->next->op, + rt->op); + rtdump(rt->next, f, nfapresent, level); + } +} + +/* + - dump - dump an RE in human-readable form + ^ static VOID dump(regex_t *, FILE *); + */ +static VOID +dump(re, f) +regex_t *re; +FILE *f; +{ +} + +#undef NOERRN +#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ + +#define COMPILE 1 +#include "lex.c" +#include "color.c" +#include "locale.c" +#include "nfa.c" diff --git a/generic/exec.c b/generic/exec.c new file mode 100644 index 0000000..5c21701 --- /dev/null +++ b/generic/exec.c @@ -0,0 +1,1753 @@ +/* + * exec.c -- + * + * Regexp package file: re_*exec and friends - match REs + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., and Sun Microsystems Inc., none of + * whom are responsible for the results. The author thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (c) 1998 by Sun Microsystems, Inc. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * SCCS: @(#) exec.c 1.10 98/01/21 14:32:57 + */ + +#include "tclInt.h" +#include +#include "tclRegexp.h" +#include "chr.h" +#include "guts.h" + + +/* internal variables, bundled for easy passing around */ +struct vars { + regex_t *re; + struct guts *g; + int eflags; /* copies of arguments */ + size_t nmatch; + regmatch_t *pmatch; + chr *start; /* start of string */ + chr *stop; /* just past end of string */ + int err; /* error code if any (0 none) */ + regoff_t *mem; /* memory vector for backtracking */ + regoff_t *mem1; /* localizer vector */ + regoff_t *mem2; /* dissector vector */ +}; +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return;} /* if error seen, return */ +#define OFF(p) ((p) - v->start) + + + +/* lazy-DFA representation */ +struct arcp { /* "pointer" to an outarc */ + struct sset *ss; + color co; +}; + +struct sset { /* state set */ + unsigned *states; /* pointer to bitvector */ + unsigned hash; /* xor of bitvector */ + int flags; +# define STARTER 01 /* the initial state set */ +# define POSTSTATE 02 /* includes the goal state */ + struct arcp ins; /* chain of inarcs pointing here */ + chr *lastseen; /* last entered on arrival here */ + struct sset **outs; /* outarc vector indexed by color */ + struct arcp *inchain; /* chain-pointer vector for outarcs */ +}; + +struct dfa { + int nssets; /* size of cache */ + int nssused; /* how many entries occupied yet */ + int nstates; /* number of states */ + int ncolors; /* length of outarc and inchain vectors */ + int wordsper; /* length of state-set bitvectors */ + struct sset *ssets; /* state-set cache */ + unsigned *statesarea; /* bitvector storage */ + unsigned *work; /* pointer to work area within statesarea */ + struct sset **outsarea; /* outarc-vector storage */ + struct arcp *incarea; /* inchain storage */ + struct cnfa *cnfa; + struct colormap *cm; + chr *lastpost; /* location of last cache-flushed success */ +}; + +#define CACHE 200 +#define WORK 1 /* number of work bitvectors needed */ + + + +/* + * forward declarations + */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ +/* === exec.c === */ +int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int)); +static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static VOID zapmatches _ANSI_ARGS_((regmatch_t *, size_t)); +static VOID zapmem _ANSI_ARGS_((struct vars *, struct rtree *)); +static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int dissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static int altdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static int cdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static int crevdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static int csindissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static int cbrdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static int caltdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static chr *dismatch _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static chr *dismrev _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static chr *dismsin _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); +static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); +static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *)); +static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static VOID freedfa _ANSI_ARGS_((struct dfa *)); +static unsigned hash _ANSI_ARGS_((unsigned *, int)); +static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); +static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *)); +static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); +static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *)); +static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *)); +/* === color.c === */ +union tree; +static color getcolor _ANSI_ARGS_((struct colormap *, pchr)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + + + +/* + - exec - match regular expression + ^ int exec(regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int); + */ +int +exec(re, string, len, nmatch, pmatch, flags) +regex_t *re; +CONST chr *string; +size_t len; +size_t nmatch; +regmatch_t pmatch[]; +int flags; +{ + struct vars var; + register struct vars *v = &var; + int st; + size_t n; + int complications; + + /* sanity checks */ + if (re == NULL || string == NULL || re->re_magic != REMAGIC) + return REG_INVARG; + if (re->re_csize != sizeof(chr)) + return REG_MIXED; + + /* setup */ + v->re = re; + v->g = (struct guts *)re->re_guts; + complications = (v->g->info®_UBACKREF) ? 1 : 0; + if (v->g->usedshorter) + complications = 1; + v->eflags = flags; + if (v->g->cflags®_NOSUB) + nmatch = 0; /* override client */ + v->nmatch = nmatch; + if (complications && v->nmatch < (size_t)(v->g->nsub + 1)) { + /* need work area bigger than what user gave us */ + v->pmatch = (regmatch_t *)ckalloc((v->g->nsub + 1) * + sizeof(regmatch_t)); + if (v->pmatch == NULL) + return REG_ESPACE; + v->nmatch = v->g->nsub + 1; + } else + v->pmatch = pmatch; + v->start = (chr *)string; + v->stop = (chr *)string + len; + v->err = 0; + if (complications) { + v->mem1 = (regoff_t *)ckalloc(2*v->g->ntree*sizeof(regoff_t)); + if (v->mem1 == NULL) { + if (v->pmatch != pmatch) + ckfree((char *)v->pmatch); + return REG_ESPACE; + } + v->mem2 = v->mem1 + v->g->ntree; + } else + v->mem1 = NULL; + + /* do it */ + if (complications) + st = cfind(v, &v->g->cnfa, v->g->cm); + else + st = find(v, &v->g->cnfa, v->g->cm); + if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { + zapmatches(pmatch, nmatch); + n = (nmatch < v->nmatch) ? nmatch : v->nmatch; + memcpy((VOID *)pmatch, (VOID *)v->pmatch, n*sizeof(regmatch_t)); + } + if (v->pmatch != pmatch) + ckfree((char *)v->pmatch); + if (v->mem1 != NULL) + ckfree((char *)v->mem1); + return st; +} + +/* + - find - find a match for the main NFA (no-complications case) + ^ static int find(struct vars *, struct cnfa *, struct colormap *); + */ +static int +find(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; +{ + struct dfa *d = newdfa(v, cnfa, cm); + chr *begin; + chr *end; + chr *stop = (cnfa->leftanch) ? v->start : v->stop; + + if (d == NULL) + return v->err; + + for (begin = v->start; begin <= stop; begin++) { + if (v->eflags®_MTRACE) + printf("\ntrying at %ld\n", (long)OFF(begin)); + end = longest(v, d, begin, v->stop); + if (end != NULL) { + if (v->nmatch > 0) { + v->pmatch[0].rm_so = OFF(begin); + v->pmatch[0].rm_eo = OFF(end); + } + freedfa(d); + if (v->nmatch > 1) { + zapmatches(v->pmatch, v->nmatch); + return dissect(v, v->g->tree, begin, end); + } + return REG_OKAY; + } + } + + freedfa(d); + return REG_NOMATCH; +} + +/* + - cfind - find a match for the main NFA (with complications) + ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); + */ +static int +cfind(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; +{ + struct dfa *d = newdfa(v, cnfa, cm); + chr *begin; + chr *end; + chr *stop = (cnfa->leftanch) ? v->start : v->stop; + chr *estop; + int er; + int usedis = (v->g->tree == NULL || v->g->tree->op == '|') ? 0 : 1; + + if (d == NULL) + return v->err; + + if (!v->g->usedshorter) + usedis = 0; + for (begin = v->start; begin <= stop; begin++) { + if (v->eflags®_MTRACE) + printf("\ntrying at %ld\n", (long)OFF(begin)); + if (usedis) { + v->mem = v->mem1; + zapmem(v, v->g->tree); + } + estop = v->stop; + for (;;) { + if (usedis) { + v->mem = v->mem1; + end = dismatch(v, v->g->tree, begin, v->stop); + } else + end = longest(v, d, begin, estop); + if (end == NULL) + break; /* NOTE BREAK OUT */ + if (v->eflags®_MTRACE) + printf("tentative end %ld\n", (long)OFF(end)); + zapmatches(v->pmatch, v->nmatch); + v->mem = v->mem2; + zapmem(v, v->g->tree); + er = cdissect(v, v->g->tree, begin, end); + switch (er) { + case REG_OKAY: + if (v->nmatch > 0) { + v->pmatch[0].rm_so = OFF(begin); + v->pmatch[0].rm_eo = OFF(end); + } + freedfa(d); + return REG_OKAY; + case REG_NOMATCH: + /* go around and try again */ + if (!usedis) { + if (end == begin) { + /* no point in trying again */ + freedfa(d); + return REG_NOMATCH; + } + estop = end - 1; + } + break; + default: + freedfa(d); + return er; + } + } + } + + freedfa(d); + return REG_NOMATCH; +} + +/* + - zapmatches - initialize the subexpression matches to "no match" + ^ static VOID zapmatches(regmatch_t *, size_t); + */ +static VOID +zapmatches(p, n) +regmatch_t *p; +size_t n; +{ + size_t i; + + for (i = 1; i < n; i++) { + p[i].rm_so = -1; + p[i].rm_eo = -1; + } +} + +/* + - zapmem - initialize the retry memory of a subtree to zeros + ^ static VOID zapmem(struct vars *, struct rtree *); + */ +static VOID +zapmem(v, rt) +struct vars *v; +struct rtree *rt; +{ + if (rt == NULL) + return; + + assert(v->mem != NULL); + v->mem[rt->no] = 0; + + if (rt->left.tree != NULL) + zapmem(v, rt->left.tree); + if (rt->left.subno > 0) { + v->pmatch[rt->left.subno].rm_so = -1; + v->pmatch[rt->left.subno].rm_eo = -1; + } + if (rt->right.tree != NULL) + zapmem(v, rt->right.tree); + if (rt->right.subno > 0) { + v->pmatch[rt->right.subno].rm_so = -1; + v->pmatch[rt->right.subno].rm_eo = -1; + } + if (rt->next != NULL) + zapmem(v, rt->next); +} + +/* + - subset - set any subexpression relevant to a successful subre + ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); + */ +static VOID +subset(v, sub, begin, end) +struct vars *v; +struct subre *sub; +chr *begin; +chr *end; +{ + int n = sub->subno; + + if (n == 0) + return; + assert(n > 0); + if ((size_t)n >= v->nmatch) + return; + + if (v->eflags®_MTRACE) + printf("setting %d\n", n); + v->pmatch[n].rm_so = OFF(begin); + v->pmatch[n].rm_eo = OFF(end); +} + +/* + - dissect - determine subexpression matches (uncomplicated case) + ^ static int dissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +dissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + struct dfa *d2; + chr *mid; + int i; + + if (rt == NULL) + return REG_OKAY; + if (v->eflags®_MTRACE) + printf("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + + /* alternatives -- punt to auxiliary */ + if (rt->op == '|') + return altdissect(v, rt, begin, end); + + /* concatenation -- need to split the substring between parts */ + assert(rt->op == ','); + assert(rt->left.cnfa.nstates > 0); + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return v->err; + + /* in some cases, there may be no right side... */ + if (rt->right.cnfa.nstates == 0) { + if (v->eflags®_MTRACE) + printf("singleton\n"); + if (longest(v, d, begin, end) != end) { + freedfa(d); + return REG_ASSERT; + } + freedfa(d); + assert(rt->left.subno >= 0); + subset(v, &rt->left, begin, end); + return dissect(v, rt->left.tree, begin, end); + } + + /* general case */ + assert(rt->right.cnfa.nstates > 0); + d2 = newdfa(v, &rt->right.cnfa, v->g->cm); + if (ISERR()) { + freedfa(d); + return v->err; + } + + /* pick a tentative midpoint */ + mid = longest(v, d, begin, end); + if (mid == NULL) { + freedfa(d); + freedfa(d2); + return REG_ASSERT; + } + if (v->eflags®_MTRACE) + printf("tentative midpoint %ld\n", (long)OFF(mid)); + + /* iterate until satisfaction or failure */ + while (longest(v, d2, mid, end) != end) { + /* that midpoint didn't work, find a new one */ + if (mid == begin) { + /* all possibilities exhausted! */ + if (v->eflags®_MTRACE) + printf("no midpoint!\n"); + freedfa(d); + freedfa(d2); + return REG_ASSERT; + } + mid = longest(v, d, begin, mid-1); + if (mid == NULL) { + /* failed to find a new one! */ + if (v->eflags®_MTRACE) + printf("failed midpoint!\n"); + freedfa(d); + freedfa(d2); + return REG_ASSERT; + } + if (v->eflags®_MTRACE) + printf("new midpoint %ld\n", (long)OFF(mid)); + } + + /* satisfaction */ + if (v->eflags®_MTRACE) + printf("successful\n"); + freedfa(d); + freedfa(d2); + assert(rt->left.subno >= 0); + subset(v, &rt->left, begin, mid); + assert(rt->right.subno >= 0); + subset(v, &rt->right, mid, end); + i = dissect(v, rt->left.tree, begin, mid); + if (i != REG_OKAY) + return i; + return dissect(v, rt->right.tree, mid, end); +} + +/* + - altdissect - determine alternative subexpression matches (uncomplicated) + ^ static int altdissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +altdissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + int i; + + assert(rt != NULL); + assert(rt->op == '|'); + + for (i = 0; rt != NULL; rt = rt->next, i++) { + if (v->eflags®_MTRACE) + printf("trying %dth\n", i); + assert(rt->left.begin != NULL); + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return v->err; + if (longest(v, d, begin, end) == end) { + if (v->eflags®_MTRACE) + printf("success\n"); + freedfa(d); + assert(rt->left.subno >= 0); + subset(v, &rt->left, begin, end); + return dissect(v, rt->left.tree, begin, end); + } + freedfa(d); + } + return REG_ASSERT; /* none of them matched?!? */ +} + +/* + - cdissect - determine subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, + * plus 1 so that 0 uniquely means "clean slate". + ^ static int cdissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +cdissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + struct dfa *d2; + chr *mid; + int er; + + if (rt == NULL) + return REG_OKAY; + if (v->eflags®_MTRACE) + printf("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + + /* punt various cases to auxiliaries */ + if (rt->op == '|') /* alternatives */ + return caltdissect(v, rt, begin, end); + if (rt->op == 'b') /* backref */ + return cbrdissect(v, rt, begin, end); + if (rt->right.cnfa.nstates == 0) /* no RHS */ + return csindissect(v, rt, begin, end); + if (rt->left.prefer == SHORTER) /* reverse scan */ + return crevdissect(v, rt, begin, end); + + /* concatenation -- need to split the substring between parts */ + assert(rt->op == ','); + assert(rt->left.cnfa.nstates > 0); + assert(rt->right.cnfa.nstates > 0); + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return v->err; + d2 = newdfa(v, &rt->right.cnfa, v->g->cm); + if (ISERR()) { + freedfa(d); + return v->err; + } + if (v->eflags®_MTRACE) + printf("cconcat %d\n", rt->no); + + /* pick a tentative midpoint */ + if (v->mem[rt->no] == 0) { + mid = longest(v, d, begin, end); + if (mid == NULL) { + freedfa(d); + freedfa(d2); + return REG_NOMATCH; + } + if (v->eflags®_MTRACE) + printf("tentative midpoint %ld\n", (long)OFF(mid)); + subset(v, &rt->left, begin, mid); + v->mem[rt->no] = (mid - begin) + 1; + } else { + mid = begin + (v->mem[rt->no] - 1); + if (v->eflags®_MTRACE) + printf("working midpoint %ld\n", (long)OFF(mid)); + } + + /* iterate until satisfaction or failure */ + for (;;) { + /* try this midpoint on for size */ + er = cdissect(v, rt->left.tree, begin, mid); + if (er == REG_OKAY && longest(v, d2, mid, end) == end && + (er = cdissect(v, rt->right.tree, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) { + freedfa(d); + freedfa(d2); + return er; + } + + /* that midpoint didn't work, find a new one */ + if (mid == begin) { + /* all possibilities exhausted */ + if (v->eflags®_MTRACE) + printf("%d no midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return REG_NOMATCH; + } + mid = longest(v, d, begin, mid-1); + if (mid == NULL) { + /* failed to find a new one */ + if (v->eflags®_MTRACE) + printf("%d failed midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return REG_NOMATCH; + } + if (v->eflags®_MTRACE) + printf("%d: new midpoint %ld\n", rt->no, + (long)OFF(mid)); + subset(v, &rt->left, begin, mid); + v->mem[rt->no] = (mid - begin) + 1; + zapmem(v, rt->left.tree); + zapmem(v, rt->right.tree); + } + + /* satisfaction */ + if (v->eflags®_MTRACE) + printf("successful\n"); + freedfa(d); + freedfa(d2); + subset(v, &rt->right, mid, end); + return REG_OKAY; +} + +/* + - crevdissect - determine shortest-first subexpression matches + * The retry memory stores the offset of the trial midpoint from begin, + * plus 1 so that 0 uniquely means "clean slate". + ^ static int crevdissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +crevdissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + struct dfa *d2; + chr *mid; + int er; + + if (rt == NULL) + return REG_OKAY; + assert(rt->op == ',' && rt->left.prefer == SHORTER); + + /* concatenation -- need to split the substring between parts */ + assert(rt->left.cnfa.nstates > 0); + assert(rt->right.cnfa.nstates > 0); + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return v->err; + d2 = newdfa(v, &rt->right.cnfa, v->g->cm); + if (ISERR()) { + freedfa(d); + return v->err; + } + if (v->eflags®_MTRACE) + printf("crev %d\n", rt->no); + + /* pick a tentative midpoint */ + if (v->mem[rt->no] == 0) { + mid = shortest(v, d, begin, begin, end); + if (mid == NULL) { + freedfa(d); + freedfa(d2); + return REG_NOMATCH; + } + if (v->eflags®_MTRACE) + printf("tentative midpoint %ld\n", (long)OFF(mid)); + subset(v, &rt->left, begin, mid); + v->mem[rt->no] = (mid - begin) + 1; + } else { + mid = begin + (v->mem[rt->no] - 1); + if (v->eflags®_MTRACE) + printf("working midpoint %ld\n", (long)OFF(mid)); + } + + /* iterate until satisfaction or failure */ + for (;;) { + /* try this midpoint on for size */ + er = cdissect(v, rt->left.tree, begin, mid); + if (er == REG_OKAY && longest(v, d2, mid, end) == end && + (er = cdissect(v, rt->right.tree, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) { + freedfa(d); + freedfa(d2); + return er; + } + + /* that midpoint didn't work, find a new one */ + if (mid == end) { + /* all possibilities exhausted */ + if (v->eflags®_MTRACE) + printf("%d no midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return REG_NOMATCH; + } + mid = shortest(v, d, begin, mid+1, end); + if (mid == NULL) { + /* failed to find a new one */ + if (v->eflags®_MTRACE) + printf("%d failed midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return REG_NOMATCH; + } + if (v->eflags®_MTRACE) + printf("%d: new midpoint %ld\n", rt->no, + (long)OFF(mid)); + subset(v, &rt->left, begin, mid); + v->mem[rt->no] = (mid - begin) + 1; + zapmem(v, rt->left.tree); + zapmem(v, rt->right.tree); + } + + /* satisfaction */ + if (v->eflags®_MTRACE) + printf("successful\n"); + freedfa(d); + freedfa(d2); + subset(v, &rt->right, mid, end); + return REG_OKAY; +} + +/* + - csindissect - determine singleton subexpression matches (with complications) + ^ static int csindissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +csindissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + int er; + + assert(rt != NULL); + assert(rt->op == ','); + assert(rt->right.cnfa.nstates == 0); + if (v->eflags®_MTRACE) + printf("csingleton %d\n", rt->no); + + assert(rt->left.cnfa.nstates > 0); + + /* exploit memory only to suppress repeated work in retries */ + if (!v->mem[rt->no]) { + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (longest(v, d, begin, end) != end) { + freedfa(d); + return REG_NOMATCH; + } + freedfa(d); + v->mem[rt->no] = 1; + if (v->eflags®_MTRACE) + printf("csingleton matched\n"); + } + + er = cdissect(v, rt->left.tree, begin, end); + if (er != REG_OKAY) + return er; + subset(v, &rt->left, begin, end); + return REG_OKAY; +} + +/* + - cbrdissect - determine backref subexpression matches + ^ static int cbrdissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +cbrdissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + int i; + int n = -rt->left.subno; + size_t len; + chr *paren; + chr *p; + chr *stop; + int min = rt->left.min; + int max = rt->left.max; + + assert(rt != NULL); + assert(rt->op == 'b'); + assert(rt->right.cnfa.nstates == 0); + assert((size_t)n < v->nmatch); + + if (v->eflags®_MTRACE) + printf("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max); + + if (v->pmatch[n].rm_so == -1) + return REG_NOMATCH; + paren = v->start + v->pmatch[n].rm_so; + len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; + + /* no room to maneuver -- retries are pointless */ + if (v->mem[rt->no]) + return REG_NOMATCH; + v->mem[rt->no] = 1; + + /* special-case zero-length string */ + if (len == 0) { + if (begin == end) + return REG_OKAY; + return REG_NOMATCH; + } + + /* and too-short string */ + if ((size_t)(end - begin) < len) + return REG_NOMATCH; + stop = end - len; + + /* count occurrences */ + i = 0; + for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { + if ((*v->g->compare)(paren, p, len) != 0) + break; + i++; + } + if (v->eflags®_MTRACE) + printf("cbackref found %d\n", i); + + /* and sort it out */ + if (p != end) /* didn't consume all of it */ + return REG_NOMATCH; + if (min <= i && (i <= max || max == INFINITY)) + return REG_OKAY; + return REG_NOMATCH; /* out of range */ +} + +/* + - caltdissect - determine alternative subexpression matches (w. complications) + ^ static int caltdissect(struct vars *, struct rtree *, chr *, chr *); + */ +static int /* regexec return code */ +caltdissect(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + int er; +# define UNTRIED 0 /* not yet tried at all */ +# define TRYING 1 /* top matched, trying submatches */ +# define TRIED 2 /* top didn't match or submatches exhausted */ + + if (rt == NULL) + return REG_NOMATCH; + assert(rt->op == '|'); + if (v->mem[rt->no] == TRIED) + return caltdissect(v, rt->next, begin, end); + + if (v->eflags®_MTRACE) + printf("calt n%d\n", rt->no); + assert(rt->left.begin != NULL); + + if (v->mem[rt->no] == UNTRIED) { + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return v->err; + if (longest(v, d, begin, end) != end) { + freedfa(d); + v->mem[rt->no] = TRIED; + return caltdissect(v, rt->next, begin, end); + } + freedfa(d); + if (v->eflags®_MTRACE) + printf("calt matched\n"); + v->mem[rt->no] = TRYING; + } + + er = cdissect(v, rt->left.tree, begin, end); + if (er == REG_OKAY) { + subset(v, &rt->left, begin, end); + return REG_OKAY; + } + if (er != REG_NOMATCH) + return er; + + v->mem[rt->no] = TRIED; + return caltdissect(v, rt->next, begin, end); +} + +/* + - dismatch - determine overall match using top-level dissection + * The retry memory stores the offset of the trial midpoint from begin, + * plus 1 so that 0 uniquely means "clean slate". + ^ static chr *dismatch(struct vars *, struct rtree *, chr *, chr *); + */ +static chr * /* endpoint, or NULL */ +dismatch(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + struct dfa *d2; + chr *mid; + chr *ret; + + if (rt == NULL) + return begin; + if (v->eflags®_MTRACE) + printf("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + + /* punt various cases to auxiliaries */ + if (rt->right.cnfa.nstates == 0) /* no RHS */ + return dismsin(v, rt, begin, end); + if (rt->left.prefer == SHORTER) /* reverse scan */ + return dismrev(v, rt, begin, end); + + /* concatenation -- need to split the substring between parts */ + assert(rt->op == ','); + assert(rt->left.cnfa.nstates > 0); + assert(rt->right.cnfa.nstates > 0); + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return NULL; + d2 = newdfa(v, &rt->right.cnfa, v->g->cm); + if (ISERR()) { + freedfa(d); + return NULL; + } + if (v->eflags®_MTRACE) + printf("dconcat %d\n", rt->no); + + /* pick a tentative midpoint */ + if (v->mem[rt->no] == 0) { + mid = longest(v, d, begin, end); + if (mid == NULL) { + freedfa(d); + freedfa(d2); + return NULL; + } + if (v->eflags®_MTRACE) + printf("tentative midpoint %ld\n", (long)OFF(mid)); + v->mem[rt->no] = (mid - begin) + 1; + } else { + mid = begin + (v->mem[rt->no] - 1); + if (v->eflags®_MTRACE) + printf("working midpoint %ld\n", (long)OFF(mid)); + } + + /* iterate until satisfaction or failure */ + for (;;) { + /* try this midpoint on for size */ + if (rt->right.tree == NULL || rt->right.tree->op == 'b') { + if (rt->right.prefer == LONGER) + ret = longest(v, d2, mid, end); + else + ret = shortest(v, d2, mid, mid, end); + } else { + if (longest(v, d2, mid, end) != NULL) + ret = dismatch(v, rt->right.tree, mid, end); + else + ret = NULL; + } + if (ret != NULL) + break; /* NOTE BREAK OUT */ + + /* that midpoint didn't work, find a new one */ + if (mid == begin) { + /* all possibilities exhausted */ + if (v->eflags®_MTRACE) + printf("%d no midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return NULL; + } + mid = longest(v, d, begin, mid-1); + if (mid == NULL) { + /* failed to find a new one */ + if (v->eflags®_MTRACE) + printf("%d failed midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return NULL; + } + if (v->eflags®_MTRACE) + printf("%d: new midpoint %ld\n", rt->no, + (long)OFF(mid)); + v->mem[rt->no] = (mid - begin) + 1; + zapmem(v, rt->right.tree); + } + + /* satisfaction */ + if (v->eflags®_MTRACE) + printf("successful\n"); + freedfa(d); + freedfa(d2); + return ret; +} + +/* + - dismrev - determine overall match using top-level dissection + * The retry memory stores the offset of the trial midpoint from begin, + * plus 1 so that 0 uniquely means "clean slate". + ^ static chr *dismrev(struct vars *, struct rtree *, chr *, chr *); + */ +static chr * /* endpoint, or NULL */ +dismrev(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + struct dfa *d2; + chr *mid; + chr *ret; + + if (rt == NULL) + return begin; + if (v->eflags®_MTRACE) + printf("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + + /* concatenation -- need to split the substring between parts */ + assert(rt->op == ','); + assert(rt->left.cnfa.nstates > 0); + assert(rt->right.cnfa.nstates > 0); + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (ISERR()) + return NULL; + d2 = newdfa(v, &rt->right.cnfa, v->g->cm); + if (ISERR()) { + freedfa(d); + return NULL; + } + if (v->eflags®_MTRACE) + printf("dconcat %d\n", rt->no); + + /* pick a tentative midpoint */ + if (v->mem[rt->no] == 0) { + mid = shortest(v, d, begin, begin, end); + if (mid == NULL) { + freedfa(d); + freedfa(d2); + return NULL; + } + if (v->eflags®_MTRACE) + printf("tentative midpoint %ld\n", (long)OFF(mid)); + v->mem[rt->no] = (mid - begin) + 1; + } else { + mid = begin + (v->mem[rt->no] - 1); + if (v->eflags®_MTRACE) + printf("working midpoint %ld\n", (long)OFF(mid)); + } + + /* iterate until satisfaction or failure */ + for (;;) { + /* try this midpoint on for size */ + if (rt->right.tree == NULL || rt->right.tree->op == 'b') { + if (rt->right.prefer == LONGER) + ret = longest(v, d2, mid, end); + else + ret = shortest(v, d2, mid, mid, end); + } else { + if (longest(v, d2, mid, end) != NULL) + ret = dismatch(v, rt->right.tree, mid, end); + else + ret = NULL; + } + if (ret != NULL) + break; /* NOTE BREAK OUT */ + + /* that midpoint didn't work, find a new one */ + if (mid == end) { + /* all possibilities exhausted */ + if (v->eflags®_MTRACE) + printf("%d no midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return NULL; + } + mid = shortest(v, d, begin, mid+1, end); + if (mid == NULL) { + /* failed to find a new one */ + if (v->eflags®_MTRACE) + printf("%d failed midpoint\n", rt->no); + freedfa(d); + freedfa(d2); + return NULL; + } + if (v->eflags®_MTRACE) + printf("%d: new midpoint %ld\n", rt->no, + (long)OFF(mid)); + v->mem[rt->no] = (mid - begin) + 1; + zapmem(v, rt->right.tree); + } + + /* satisfaction */ + if (v->eflags®_MTRACE) + printf("successful\n"); + freedfa(d); + freedfa(d2); + return ret; +} + +/* + - dismsin - determine singleton subexpression matches (with complications) + ^ static chr *dismsin(struct vars *, struct rtree *, chr *, chr *); + */ +static chr * +dismsin(v, rt, begin, end) +struct vars *v; +struct rtree *rt; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ +{ + struct dfa *d; + chr *ret; + + assert(rt != NULL); + assert(rt->op == ','); + assert(rt->right.cnfa.nstates == 0); + if (v->eflags®_MTRACE) + printf("dsingleton %d\n", rt->no); + + assert(rt->left.cnfa.nstates > 0); + + /* retries are pointless */ + if (v->mem[rt->no]) + return NULL; + v->mem[rt->no] = 1; + + d = newdfa(v, &rt->left.cnfa, v->g->cm); + if (d == NULL) + return NULL; + if (rt->left.prefer == LONGER) + ret = longest(v, d, begin, end); + else + ret = shortest(v, d, begin, begin, end); + freedfa(d); + if (ret != NULL && (v->eflags®_MTRACE)) + printf("dsingleton matched\n"); + return ret; +} + +/* + - longest - longest-preferred matching engine + ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *); + */ +static chr * /* endpoint, or NULL */ +longest(v, d, start, stop) +struct vars *v; /* used only for debug and exec flags */ +struct dfa *d; +chr *start; /* where the match should start */ +chr *stop; /* match must end at or before here */ +{ + chr *cp; + chr *realstop = (stop == v->stop) ? stop : stop + 1; + color co; + struct sset *css; + struct sset *ss; + chr *post; + int i; + struct colormap *cm = d->cm; + + /* initialize */ + css = initialize(v, d, start); + cp = start; + + /* startup */ + if (v->eflags®_FTRACE) + printf("+++ startup +++\n"); + if (cp == v->start) { + co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; + if (v->eflags®_FTRACE) + printf("color %ld\n", (long)co); + } else { + co = getcolor(cm, *(cp - 1)); + if (v->eflags®_FTRACE) + printf("char %c, color %ld\n", (char)*(cp-1), (long)co); + } + css = miss(v, d, css, co, cp); + if (css == NULL) + return NULL; + css->lastseen = cp; + + /* main loop */ + if (v->eflags®_FTRACE) + while (cp < realstop) { + printf("+++ at c%d +++\n", css - d->ssets); + co = getcolor(cm, *cp); + printf("char %c, color %ld\n", (char)*cp, (long)co); + ss = css->outs[co]; + if (ss == NULL) { + ss = miss(v, d, css, co, cp); + if (ss == NULL) + break; /* NOTE BREAK OUT */ + } + cp++; + ss->lastseen = cp; + css = ss; + } + else + while (cp < realstop) { + co = getcolor(cm, *cp); + ss = css->outs[co]; + if (ss == NULL) { + ss = miss(v, d, css, co, cp+1); + if (ss == NULL) + break; /* NOTE BREAK OUT */ + } + cp++; + ss->lastseen = cp; + css = ss; + } + + /* shutdown */ + if (v->eflags®_FTRACE) + printf("+++ shutdown at c%d +++\n", css - d->ssets); + if (cp == v->stop && stop == v->stop) { + co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; + if (v->eflags®_FTRACE) + printf("color %ld\n", (long)co); + ss = miss(v, d, css, co, cp); + /* special case: match ended at eol? */ + if (ss != NULL && (ss->flags&POSTSTATE)) + return cp; + else if (ss != NULL) + ss->lastseen = cp; /* to be tidy */ + } + + /* find last match, if any */ + post = d->lastpost; + for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++) + if ((ss->flags&POSTSTATE) && post != ss->lastseen && + (post == NULL || post < ss->lastseen)) + post = ss->lastseen; + if (post != NULL) /* found one */ + return post - 1; + + return NULL; +} + +/* + - shortest - shortest-preferred matching engine + ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *); + */ +static chr * /* endpoint, or NULL */ +shortest(v, d, start, min, max) +struct vars *v; /* used only for debug and exec flags */ +struct dfa *d; +chr *start; /* where the match should start */ +chr *min; /* match must end at or after here */ +chr *max; /* match must end at or before here */ +{ + chr *cp; + chr *realmin = (min == v->stop) ? min : min + 1; + chr *realmax = (max == v->stop) ? max : max + 1; + color co; + struct sset *css; + struct sset *ss = NULL; + struct colormap *cm = d->cm; + + /* initialize */ + css = initialize(v, d, start); + cp = start; + + /* startup */ + if (v->eflags®_FTRACE) + printf("--- startup ---\n"); + if (cp == v->start) { + co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; + if (v->eflags®_FTRACE) + printf("color %ld\n", (long)co); + } else { + co = getcolor(cm, *(cp - 1)); + if (v->eflags®_FTRACE) + printf("char %c, color %ld\n", (char)*(cp-1), (long)co); + } + css = miss(v, d, css, co, cp); + if (css == NULL) + return NULL; + css->lastseen = cp; + + /* main loop */ + if (v->eflags®_FTRACE) + while (cp < realmax) { + printf("--- at c%d ---\n", css - d->ssets); + co = getcolor(cm, *cp); + printf("char %c, color %ld\n", (char)*cp, (long)co); + ss = css->outs[co]; + if (ss == NULL) { + ss = miss(v, d, css, co, cp); + if (ss == NULL) + break; /* NOTE BREAK OUT */ + } + cp++; + ss->lastseen = cp; + css = ss; + if ((ss->flags&POSTSTATE) && cp >= realmin) + break; /* NOTE BREAK OUT */ + } + else + while (cp < realmax) { + co = getcolor(cm, *cp); + ss = css->outs[co]; + if (ss == NULL) { + ss = miss(v, d, css, co, cp+1); + if (ss == NULL) + break; /* NOTE BREAK OUT */ + } + cp++; + ss->lastseen = cp; + css = ss; + if ((ss->flags&POSTSTATE) && cp >= realmin) + break; /* NOTE BREAK OUT */ + } + + if (ss == NULL) + return NULL; + if (ss->flags&POSTSTATE) { + assert(cp >= realmin); + return cp - 1; + } + + /* shutdown */ + if (v->eflags®_FTRACE) + printf("--- shutdown at c%d ---\n", css - d->ssets); + if (cp == v->stop && max == v->stop) { + co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; + if (v->eflags®_FTRACE) + printf("color %ld\n", (long)co); + ss = miss(v, d, css, co, cp); + /* special case: match ended at eol? */ + if (ss != NULL && (ss->flags&POSTSTATE)) + return cp; + } + + return NULL; +} + +/* + - newdfa - set up a fresh DFA + ^ static struct dfa *newdfa(struct vars *, struct cnfa *, + ^ struct colormap *); + */ +static struct dfa * +newdfa(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; +{ + struct dfa *d = (struct dfa *)ckalloc(sizeof(struct dfa)); + int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; + struct sset *ss; + int i; + + assert(cnfa != NULL && cnfa->nstates != 0); + if (d == NULL) { + ERR(REG_ESPACE); + return NULL; + } + + d->ssets = (struct sset *)ckalloc(CACHE * sizeof(struct sset)); + d->statesarea = (unsigned *)ckalloc((CACHE+WORK) * wordsper * + sizeof(unsigned)); + d->work = &d->statesarea[CACHE * wordsper]; + d->outsarea = (struct sset **)ckalloc(CACHE * cnfa->ncolors * + sizeof(struct sset *)); + d->incarea = (struct arcp *)ckalloc(CACHE * cnfa->ncolors * + sizeof(struct arcp)); + if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL || + d->incarea == NULL) { + freedfa(d); + ERR(REG_ESPACE); + return NULL; + } + + d->nssets = (v->eflags®_SMALL) ? 5 : CACHE; + d->nssused = 0; + d->nstates = cnfa->nstates; + d->ncolors = cnfa->ncolors; + d->wordsper = wordsper; + d->cnfa = cnfa; + d->cm = cm; + d->lastpost = NULL; + + for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) { + /* initialization of most fields is done as needed */ + ss->states = &d->statesarea[i * d->wordsper]; + ss->outs = &d->outsarea[i * d->ncolors]; + ss->inchain = &d->incarea[i * d->ncolors]; + } + + return d; +} + +/* + - freedfa - free a DFA + ^ static VOID freedfa(struct dfa *); + */ +static VOID +freedfa(d) +struct dfa *d; +{ + if (d->ssets != NULL) + ckfree((char *)d->ssets); + if (d->statesarea != NULL) + ckfree((char *)d->statesarea); + if (d->outsarea != NULL) + ckfree((char *)d->outsarea); + if (d->incarea != NULL) + ckfree((char *)d->incarea); + ckfree((char *)d); +} + +/* + - hash - construct a hash code for a bitvector + * There are probably better ways, but they're more expensive. + ^ static unsigned hash(unsigned *, int); + */ +static unsigned +hash(uv, n) +unsigned *uv; +int n; +{ + int i; + unsigned h; + + h = 0; + for (i = 0; i < n; i++) + h ^= uv[i]; + return h; +} + +/* + - initialize - hand-craft a cache entry for startup, otherwise get ready + ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); + */ +static struct sset * +initialize(v, d, start) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +chr *start; +{ + struct sset *ss; + int i; + + /* is previous one still there? */ + if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) + ss = &d->ssets[0]; + else { /* no, must (re)build it */ + ss = getvacant(v, d); + for (i = 0; i < d->wordsper; i++) + ss->states[i] = 0; + BSET(ss->states, d->cnfa->pre); + ss->hash = hash(ss->states, d->wordsper); + assert(d->cnfa->pre != d->cnfa->post); + ss->flags = STARTER; + /* lastseen dealt with below */ + } + + for (i = 0; i < d->nssused; i++) + d->ssets[i].lastseen = NULL; + ss->lastseen = start; /* maybe untrue, but harmless */ + d->lastpost = NULL; + return ss; +} + +/* + - miss - handle a cache miss + ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, + ^ pcolor, chr *); + */ +static struct sset * /* NULL if goes to empty set */ +miss(v, d, css, co, cp) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +struct sset *css; +pcolor co; +chr *cp; /* next chr */ +{ + struct cnfa *cnfa = d->cnfa; + int i; + unsigned h; + struct carc *ca; + struct sset *p; + int ispost; + int gotstate; + int dolacons; + int didlacons; + + /* for convenience, we can be called even if it might not be a miss */ + if (css->outs[co] != NULL) { + if (v->eflags®_FTRACE) + printf("hit\n"); + return css->outs[co]; + } + if (v->eflags®_FTRACE) + printf("miss\n"); + + /* first, what set of states would we end up in? */ + for (i = 0; i < d->wordsper; i++) + d->work[i] = 0; + ispost = 0; + gotstate = 0; + for (i = 0; i < d->nstates; i++) + if (ISBSET(css->states, i)) + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) + if (ca->co == co) { + BSET(d->work, ca->to); + gotstate = 1; + if (ca->to == cnfa->post) + ispost = 1; + if (v->eflags®_FTRACE) + printf("%d -> %d\n", i, ca->to); + } + dolacons = (gotstate) ? cnfa->haslacons : 0; + didlacons = 0; + while (dolacons) { /* transitive closure */ + dolacons = 0; + for (i = 0; i < d->nstates; i++) + if (ISBSET(d->work, i)) + for (ca = cnfa->states[i]; ca->co != COLORLESS; + ca++) + if (ca->co > cnfa->ncolors && + !ISBSET(d->work, ca->to) && + lacon(v, cnfa, cp, + ca->co)) { + BSET(d->work, ca->to); + dolacons = 1; + didlacons = 1; + if (ca->to == cnfa->post) + ispost = 1; + if (v->eflags®_FTRACE) + printf("%d :-> %d\n", + i, ca->to); + } + } + if (!gotstate) + return NULL; + h = hash(d->work, d->wordsper); + + /* next, is that in the cache? */ + for (p = d->ssets, i = d->nssused; i > 0; p++, i--) + if (p->hash == h && memcmp((VOID *)d->work, (VOID *)p->states, + d->wordsper*sizeof(unsigned)) == 0) { + if (v->eflags®_FTRACE) + printf("cached c%d\n", p - d->ssets); + break; /* NOTE BREAK OUT */ + } + if (i == 0) { /* nope, need a new cache entry */ + p = getvacant(v, d); + assert(p != css); + for (i = 0; i < d->wordsper; i++) + p->states[i] = d->work[i]; + p->hash = h; + p->flags = (ispost) ? POSTSTATE : 0; + /* lastseen to be dealt with by caller */ + } + + if (!didlacons) { /* lookahead conds. always cache miss */ + css->outs[co] = p; + css->inchain[co] = p->ins; + p->ins.ss = css; + p->ins.co = (color) co; + } + return p; +} + +/* + - lacon - lookahead-constraint checker for miss() + ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); + */ +static int /* predicate: constraint satisfied? */ +lacon(v, pcnfa, precp, co) +struct vars *v; +struct cnfa *pcnfa; /* parent cnfa */ +chr *precp; /* points to previous chr */ +pcolor co; /* "color" of the lookahead constraint */ +{ + int n; + struct subre *sub; + struct dfa *d; + chr *end; + + n = co - pcnfa->ncolors; + assert(n < v->g->nlacons && v->g->lacons != NULL); + if (v->eflags®_FTRACE) + printf("=== testing lacon %d\n", n); + sub = &v->g->lacons[n]; + d = newdfa(v, &sub->cnfa, v->g->cm); + if (d == NULL) { + ERR(REG_ESPACE); + return 0; + } + end = longest(v, d, precp, v->stop); + freedfa(d); + if (v->eflags®_FTRACE) + printf("=== lacon %d match %d\n", n, (end != NULL)); + return (sub->subno) ? (end != NULL) : (end == NULL); +} + +/* + - getvacant - get a vacant state set + * This routine clears out the inarcs and outarcs, but does not otherwise + * clear the innards of the state set -- that's up to the caller. + ^ static struct sset *getvacant(struct vars *, struct dfa *); + */ +static struct sset * +getvacant(v, d) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +{ + int i; + struct sset *ss; + struct sset *p; + struct arcp ap; + struct arcp lastap; + color co; + + ss = pickss(v, d); + + /* clear out its inarcs, including self-referential ones */ + ap = ss->ins; + while ((p = ap.ss) != NULL) { + co = ap.co; + if (v->eflags®_FTRACE) + printf("zapping c%d's %ld outarc\n", p - d->ssets, + (long)co); + p->outs[co] = NULL; + ap = p->inchain[co]; + p->inchain[co].ss = NULL; /* paranoia */ + } + ss->ins.ss = NULL; + + /* take it off the inarc chains of the ssets reached by its outarcs */ + for (i = 0; i < d->ncolors; i++) { + p = ss->outs[i]; + assert(p != ss); /* not self-referential */ + if (p == NULL) + continue; /* NOTE CONTINUE */ + if (v->eflags®_FTRACE) + printf("deleting outarc %d from c%d's inarc chain\n", + i, p - d->ssets); + if (p->ins.ss == ss && p->ins.co == i) + p->ins = ss->inchain[i]; + else { + assert(p->ins.ss != NULL); + for (ap = p->ins; ap.ss != NULL && + !(ap.ss == ss && ap.co == i); + ap = ap.ss->inchain[ap.co]) + lastap = ap; + assert(ap.ss != NULL); + lastap.ss->inchain[lastap.co] = ss->inchain[i]; + } + ss->outs[i] = NULL; + ss->inchain[i].ss = NULL; + } + + /* if ss was a success state, may need to remember location */ + if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && + (d->lastpost == NULL || d->lastpost < ss->lastseen)) + d->lastpost = ss->lastseen; + + return ss; +} + +/* + - pickss - pick the next stateset to be used + ^ static struct sset *pickss(struct vars *, struct dfa *); + */ +static struct sset * +pickss(v, d) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +{ + int i; + struct sset *ss; + struct sset *oldest; + + /* shortcut for cases where cache isn't full */ + if (d->nssused < d->nssets) { + ss = &d->ssets[d->nssused]; + d->nssused++; + if (v->eflags®_FTRACE) + printf("new c%d\n", ss - d->ssets); + /* must make innards consistent */ + ss->ins.ss = NULL; + for (i = 0; i < d->ncolors; i++) { + ss->outs[i] = NULL; + ss->inchain[i].ss = NULL; + } + ss->flags = 0; + return ss; + } + + /* look for oldest */ + oldest = d->ssets; + for (ss = d->ssets, i = d->nssets; i > 0; ss++, i--) { + if (ss->lastseen != oldest->lastseen && (ss->lastseen == NULL || + ss->lastseen < oldest->lastseen)) + oldest = ss; + } + if (v->eflags®_FTRACE) + printf("replacing c%d\n", oldest - d->ssets); + return oldest; +} + +#define EXEC 1 +#include "color.c" diff --git a/generic/guts.h b/generic/guts.h new file mode 100644 index 0000000..9a1c4eb --- /dev/null +++ b/generic/guts.h @@ -0,0 +1,233 @@ +/* + * guts.h -- + * + * Regexp package file: Misc. utilities. + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., and Sun Microsystems Inc., none of + * whom are responsible for the results. The author thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (c) 1998 by Sun Microsystems, Inc. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * SCCS: @(#) guts.h 1.7 98/01/21 14:33:04 + */ + +#include "tclInt.h" + +#define NOTREACHED 0 +#define xxx 1 + +#ifndef _POSIX2_RE_DUP_MAX +#define _POSIX2_RE_DUP_MAX 255 +#endif +#define DUPMAX _POSIX2_RE_DUP_MAX +#define INFINITY (DUPMAX+1) + +/* bitmap manipulation */ +#define UBITS (CHAR_BIT * sizeof(unsigned)) +#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) +#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) + +/* + * Map a truth value into -1 for false, 1 for true. This is so it is + * possible to write compile-time assertions by declaring a dummy array + * of this size. (Why not #if? Because sizeof is not available there.) + */ +#define NEGIFNOT(x) (2*!!(x) - 1) /* !! ensures 0 or 1 */ + +/* + * We dissect a chr into byts for colormap table indexing. Here we define + * a byt, which will be the same as a byte on most machines... The exact + * size of a byt is not critical, but about 8 bits is good, and extraction + * of 8-bit chunks is sometimes especially fast. + * + * Changes in several places are needed to handle an increase in MAXBYTS. + * Those places check whether MAXBYTS is larger than they expect. + */ +#ifndef BYTBITS +#define BYTBITS 8 /* bits in a byt */ +#endif +#define BYTTAB (1<