diff options
Diffstat (limited to 'Doc')
-rw-r--r-- | Doc/partparse.py | 3412 | ||||
-rw-r--r-- | Doc/tools/partparse.py | 3412 |
2 files changed, 3398 insertions, 3426 deletions
diff --git a/Doc/partparse.py b/Doc/partparse.py index db53a01..2f072bb 100644 --- a/Doc/partparse.py +++ b/Doc/partparse.py @@ -27,61 +27,61 @@ MODE_MATH = 4 MODE_DMATH = 5 MODE_GOBBLEWHITE = 6 -the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \ - MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE +the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, + MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE) # Show the neighbourhood of the scanned buffer def epsilon(buf, where): - wmt, wpt = where - 10, where + 10 - if wmt < 0: - wmt = 0 - if wpt > len(buf): - wpt = len(buf) - return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.' + wmt, wpt = where - 10, where + 10 + if wmt < 0: + wmt = 0 + if wpt > len(buf): + wpt = len(buf) + return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.' # Should return the line number. never worked def lin(): - global lineno - return ' Line ' + `lineno` + '.' + global lineno + return ' Line ' + `lineno` + '.' # Displays the recursion level. def lv(lvl): - return ' Level ' + `lvl` + '.' + return ' Level ' + `lvl` + '.' # Combine the three previous functions. Used often. def lle(lvl, buf, where): - return lv(lvl) + lin() + epsilon(buf, where) - - + return lv(lvl) + lin() + epsilon(buf, where) + + # This class is only needed for _symbolic_ representation of the parse mode. class Mode: - def __init__(self, arg): - if arg not in the_modes: - raise ValueError, 'mode not in the_modes' - self.mode = arg - - def __cmp__(self, other): - if type(self) != type(other): - other = mode(other) - return cmp(self.mode, other.mode) - - def __repr__(self): - if self.mode == MODE_REGULAR: - return 'MODE_REGULAR' - elif self.mode == MODE_VERBATIM: - return 'MODE_VERBATIM' - elif self.mode == MODE_CS_SCAN: - return 'MODE_CS_SCAN' - elif self.mode == MODE_COMMENT: - return 'MODE_COMMENT' - elif self.mode == MODE_MATH: - return 'MODE_MATH' - elif self.mode == MODE_DMATH: - return 'MODE_DMATH' - elif self.mode == MODE_GOBBLEWHITE: - return 'MODE_GOBBLEWHITE' - else: - raise ValueError, 'mode not in the_modes' + def __init__(self, arg): + if arg not in the_modes: + raise ValueError, 'mode not in the_modes' + self.mode = arg + + def __cmp__(self, other): + if type(self) != type(other): + other = mode(other) + return cmp(self.mode, other.mode) + + def __repr__(self): + if self.mode == MODE_REGULAR: + return 'MODE_REGULAR' + elif self.mode == MODE_VERBATIM: + return 'MODE_VERBATIM' + elif self.mode == MODE_CS_SCAN: + return 'MODE_CS_SCAN' + elif self.mode == MODE_COMMENT: + return 'MODE_COMMENT' + elif self.mode == MODE_MATH: + return 'MODE_MATH' + elif self.mode == MODE_DMATH: + return 'MODE_DMATH' + elif self.mode == MODE_GOBBLEWHITE: + return 'MODE_GOBBLEWHITE' + else: + raise ValueError, 'mode not in the_modes' # just a wrapper around a class initialisation mode = Mode @@ -106,88 +106,88 @@ GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME ENDLINE = 9 # END-OF-LINE, data = '\n' DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par ENV = 11 # LaTeX-environment - # data =(envname,[ch,ch,ch,.]) + # data =(envname,[ch,ch,ch,.]) CSLINE = 12 # for texi: next chunk will be one group - # of args. Will be set all on 1 line + # of args. Will be set all on 1 line IGNORE = 13 # IGNORE this data ENDENV = 14 # TEMP END OF GROUP INDICATOR IF = 15 # IF-directive - # data = (flag,negate,[ch, ch, ch,...]) -the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \ - GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF + # data = (flag,negate,[ch, ch, ch,...]) +the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, + GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF) # class, just to display symbolic name class ChunkType: - def __init__(self, chunk_type): - if chunk_type not in the_types: - raise ValueError, 'chunk_type not in the_types' - self.chunk_type = chunk_type - - def __cmp__(self, other): - if type(self) != type(other): - other = chunk_type(other) - return cmp(self.chunk_type, other.chunk_type) - - def __repr__(self): - if self.chunk_type == PLAIN: - return 'PLAIN' - elif self.chunk_type == GROUP: - return 'GROUP' - elif self.chunk_type == CSNAME: - return 'CSNAME' - elif self.chunk_type == COMMENT: - return 'COMMENT' - elif self.chunk_type == DMATH: - return 'DMATH' - elif self.chunk_type == MATH: - return 'MATH' - elif self.chunk_type == OTHER: - return 'OTHER' - elif self.chunk_type == ACTIVE: - return 'ACTIVE' - elif self.chunk_type == GOBBLEDWHITE: - return 'GOBBLEDWHITE' - elif self.chunk_type == DENDLINE: - return 'DENDLINE' - elif self.chunk_type == ENDLINE: - return 'ENDLINE' - elif self.chunk_type == ENV: - return 'ENV' - elif self.chunk_type == CSLINE: - return 'CSLINE' - elif self.chunk_type == IGNORE: - return 'IGNORE' - elif self.chunk_type == ENDENV: - return 'ENDENV' - elif self.chunk_type == IF: - return 'IF' - else: - raise ValueError, 'chunk_type not in the_types' + def __init__(self, chunk_type): + if chunk_type not in the_types: + raise ValueError, 'chunk_type not in the_types' + self.chunk_type = chunk_type + + def __cmp__(self, other): + if type(self) != type(other): + other = chunk_type(other) + return cmp(self.chunk_type, other.chunk_type) + + def __repr__(self): + if self.chunk_type == PLAIN: + return 'PLAIN' + elif self.chunk_type == GROUP: + return 'GROUP' + elif self.chunk_type == CSNAME: + return 'CSNAME' + elif self.chunk_type == COMMENT: + return 'COMMENT' + elif self.chunk_type == DMATH: + return 'DMATH' + elif self.chunk_type == MATH: + return 'MATH' + elif self.chunk_type == OTHER: + return 'OTHER' + elif self.chunk_type == ACTIVE: + return 'ACTIVE' + elif self.chunk_type == GOBBLEDWHITE: + return 'GOBBLEDWHITE' + elif self.chunk_type == DENDLINE: + return 'DENDLINE' + elif self.chunk_type == ENDLINE: + return 'ENDLINE' + elif self.chunk_type == ENV: + return 'ENV' + elif self.chunk_type == CSLINE: + return 'CSLINE' + elif self.chunk_type == IGNORE: + return 'IGNORE' + elif self.chunk_type == ENDENV: + return 'ENDENV' + elif self.chunk_type == IF: + return 'IF' + else: + raise ValueError, 'chunk_type not in the_types' # ...and the wrapper _all_chunk_types = {} for t in the_types: - _all_chunk_types[t] = ChunkType(t) + _all_chunk_types[t] = ChunkType(t) def chunk_type(t): - return _all_chunk_types[t] + return _all_chunk_types[t] # store a type object of the ChunkType-class-instance... chunk_type_type = type(chunk_type(0)) # this class contains a part of the parsed buffer class Chunk: - def __init__(self, chtype, where, data): - if type(chtype) != chunk_type_type: - chtype = chunk_type(chtype) - self.chtype = chtype - if type(where) != IntType: - raise TypeError, '\'where\' is not a number' - self.where = where - self.data = data - - def __repr__(self): - return 'chunk' + `self.chtype, self.where, self.data` + def __init__(self, chtype, where, data): + if type(chtype) != chunk_type_type: + chtype = chunk_type(chtype) + self.chtype = chtype + if type(where) != IntType: + raise TypeError, "'where' is not a number" + self.where = where + self.data = data + + def __repr__(self): + return 'chunk' + `self.chtype, self.where, self.data` # and the wrapper chunk = Chunk @@ -216,40 +216,40 @@ CC_COMMENT = 14 CC_INVALID = 15 # and the names -cc_names = [\ - 'CC_ESCAPE', \ - 'CC_LBRACE', \ - 'CC_RBRACE', \ - 'CC_MATHSHIFT', \ - 'CC_ALIGNMENT', \ - 'CC_ENDLINE', \ - 'CC_PARAMETER', \ - 'CC_SUPERSCRIPT', \ - 'CC_SUBSCRIPT', \ - 'CC_IGNORE', \ - 'CC_WHITE', \ - 'CC_LETTER', \ - 'CC_OTHER', \ - 'CC_ACTIVE', \ - 'CC_COMMENT', \ - 'CC_INVALID', \ +cc_names = [ + 'CC_ESCAPE', + 'CC_LBRACE', + 'CC_RBRACE', + 'CC_MATHSHIFT', + 'CC_ALIGNMENT', + 'CC_ENDLINE', + 'CC_PARAMETER', + 'CC_SUPERSCRIPT', + 'CC_SUBSCRIPT', + 'CC_IGNORE', + 'CC_WHITE', + 'CC_LETTER', + 'CC_OTHER', + 'CC_ACTIVE', + 'CC_COMMENT', + 'CC_INVALID', ] # Show a list of catcode-name-symbols def pcl(codelist): - result = '' - for i in codelist: - result = result + cc_names[i] + ', ' - return '[' + result[:-2] + ']' + result = '' + for i in codelist: + result = result + cc_names[i] + ', ' + return '[' + result[:-2] + ']' # the name of the catcode (ACTIVE, OTHER, etc.) def pc(code): - return cc_names[code] - + return cc_names[code] + # Which catcodes make the parser stop parsing regular plaintext -regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \ - CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \ +regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, + CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE] # same for scanning a control sequence name @@ -269,37 +269,37 @@ comment_stopcodes = [CC_ENDLINE] # gather all characters together, specified by a list of catcodes def code2string(cc, codelist): - ##print 'code2string: codelist = ' + pcl(codelist), - result = '' - for category in codelist: - if cc[category]: - result = result + cc[category] - ##print 'result = ' + `result` - return result + ##print 'code2string: codelist = ' + pcl(codelist), + result = '' + for category in codelist: + if cc[category]: + result = result + cc[category] + ##print 'result = ' + `result` + return result # automatically generate all characters of catcode other, being the # complement set in the ASCII range (128 characters) def make_other_codes(cc): - otherchars = range(256) # could be made 256, no problem - for category in all_but_other_codes: - if cc[category]: - for c in cc[category]: - otherchars[ord(c)] = None - result = '' - for i in otherchars: - if i != None: - result = result + chr(i) - return result + otherchars = range(256) # could be made 256, no problem + for category in all_but_other_codes: + if cc[category]: + for c in cc[category]: + otherchars[ord(c)] = None + result = '' + for i in otherchars: + if i != None: + result = result + chr(i) + return result # catcode dump (which characters have which catcodes). def dump_cc(name, cc): - ##print '\t' + name - ##print '=' * (8+len(name)) - if len(cc) != 16: - raise TypeError, 'cc not good cat class' + ##print '\t' + name + ##print '=' * (8+len(name)) + if len(cc) != 16: + raise TypeError, 'cc not good cat class' ## for i in range(16): ## print pc(i) + '\t' + `cc[i]` - + # In the beginning,.... epoch_cc = [None] * 16 @@ -316,7 +316,7 @@ initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F' #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway ##dump_cc('initex_cc', initex_cc) - + # LPLAIN: LaTeX catcode setting (see lplain.tex) lplain_cc = initex_cc[:] lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}' @@ -330,7 +330,7 @@ lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l lplain_cc[CC_OTHER] = make_other_codes(lplain_cc) ##dump_cc('lplain_cc', lplain_cc) - + # Guido's LaTeX environment catcoded '_' as ``other'' # my own purpose catlist my_cc = lplain_cc[:] @@ -344,27 +344,27 @@ dump_cc('my_cc', my_cc) re_meaning = '\\[]^$' def un_re(str): - result = '' - for i in str: - if i in re_meaning: - result = result + '\\' - result = result + i - return result - + result = '' + for i in str: + if i in re_meaning: + result = result + '\\' + result = result + i + return result + # NOTE the negate ('^') operator in *some* of the regexps below def make_rc_regular(cc): - # problems here if '[]' are included!! - return regex.compile('[' + code2string(cc, regular_stopcodes) + ']') + # problems here if '[]' are included!! + return regex.compile('[' + code2string(cc, regular_stopcodes) + ']') def make_rc_cs_scan(cc): - return regex.compile('[^' + code2string(cc, csname_scancodes) + ']') + return regex.compile('[^' + code2string(cc, csname_scancodes) + ']') def make_rc_comment(cc): - return regex.compile('[' + code2string(cc, comment_stopcodes) + ']') + return regex.compile('[' + code2string(cc, comment_stopcodes) + ']') def make_rc_endwhite(cc): - return regex.compile('[^' + code2string(cc, white_scancodes) + ']') - + return regex.compile('[^' + code2string(cc, white_scancodes) + ']') + # regular: normal mode: @@ -386,213 +386,205 @@ rc_endwhite = make_rc_endwhite(my_cc) # This has been done in order to better check for environment-mismatches def parseit(buf, *rest): - global lineno - - if len(rest) == 3: - parsemode, start, lvl = rest - elif len(rest) == 2: - parsemode, start, lvl = rest + (0, ) - elif len(rest) == 1: - parsemode, start, lvl = rest + (0, 0) - elif len(rest) == 0: - parsemode, start, lvl = mode(MODE_REGULAR), 0, 0 - else: - raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])' - result = [] - end = len(buf) - if lvl == 0 and parsemode == mode(MODE_REGULAR): - lineno = 1 - lvl = lvl + 1 - - ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')' - - # - # some of the more regular modes... - # - - if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)): - cstate = [] - newpos = start - curpmode = parsemode - while 1: - where = newpos - #print '\tnew round: ' + epsilon(buf, where) - if where == end: - if lvl > 1 or curpmode != mode(MODE_REGULAR): - # not the way we started... - raise EOFError, 'premature end of file.' + lle(lvl, buf, where) - # the real ending of lvl-1 parse - return end, result - - pos = rc_regular.search(buf, where) - - if pos < 0: - pos = end - - if pos != where: - newpos, c = pos, chunk(PLAIN, where, (where, pos)) - result.append(c) - continue - - - # - # ok, pos == where and pos != end - # - foundchar = buf[where] - if foundchar in my_cc[CC_LBRACE]: - # recursive subgroup parse... - newpos, data = parseit(buf, curpmode, where+1, lvl) - result.append(chunk(GROUP, where, data)) - - elif foundchar in my_cc[CC_RBRACE]: - if lvl <= 1: - raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where) - if lvl == 1 and mode != mode(MODE_REGULAR): - raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)' - return where + 1, result - - elif foundchar in my_cc[CC_ESCAPE]: - # - # call the routine that actually deals with - # this problem. If do_ret is None, than - # return the value of do_ret - # - # Note that handle_cs might call this routine - # recursively again... - # - do_ret, newpos = handlecs(buf, where, \ - curpmode, lvl, result, end) - if do_ret != None: - return do_ret - - elif foundchar in my_cc[CC_COMMENT]: - newpos, data = parseit(buf, \ - mode(MODE_COMMENT), where+1, lvl) - result.append(chunk(COMMENT, where, data)) - - elif foundchar in my_cc[CC_MATHSHIFT]: - # note that recursive calls to math-mode - # scanning are called with recursion-level 0 - # again, in order to check for bad mathend - # - if where + 1 != end and \ - buf[where + 1] in \ - my_cc[CC_MATHSHIFT]: - # - # double mathshift, e.g. '$$' - # - if curpmode == mode(MODE_REGULAR): - newpos, data = parseit(buf, \ - mode(MODE_DMATH), \ - where+2, 0) - result.append(chunk(DMATH, \ - where, data)) - elif curpmode == mode(MODE_MATH): - raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) - elif lvl != 1: - raise error, 'bad mathend.' + \ - lle(lvl, buf, where) - else: - return where + 2, result - else: - # - # single math shift, e.g. '$' - # - if curpmode == mode(MODE_REGULAR): - newpos, data = parseit(buf, \ - mode(MODE_MATH), \ - where+1, 0) - result.append(chunk(MATH, \ - where, data)) - elif curpmode == mode(MODE_DMATH): - raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) - elif lvl != 1: - raise error, 'bad mathend.' + \ - lv(lvl, buf, where) - else: - return where + 1, result - - elif foundchar in my_cc[CC_IGNORE]: - print 'warning: ignored char', `foundchar` - newpos = where + 1 - - elif foundchar in my_cc[CC_ACTIVE]: - result.append(chunk(ACTIVE, where, foundchar)) - newpos = where + 1 - - elif foundchar in my_cc[CC_INVALID]: - raise error, 'invalid char ' + `foundchar` - newpos = where + 1 - - elif foundchar in my_cc[CC_ENDLINE]: - # - # after an end of line, eat the rest of - # whitespace on the beginning of the next line - # this is what LaTeX more or less does - # - # also, try to indicate double newlines (\par) - # - lineno = lineno + 1 - savedwhere = where - newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl) - if newpos != end and buf[newpos] in \ - my_cc[CC_ENDLINE]: - result.append(chunk(DENDLINE, \ - savedwhere, foundchar)) - else: - result.append(chunk(ENDLINE, \ - savedwhere, foundchar)) - else: - result.append(chunk(OTHER, where, foundchar)) - newpos = where + 1 - - elif parsemode == mode(MODE_CS_SCAN): + global lineno + + if len(rest) == 3: + parsemode, start, lvl = rest + elif len(rest) == 2: + parsemode, start, lvl = rest + (0, ) + elif len(rest) == 1: + parsemode, start, lvl = rest + (0, 0) + elif len(rest) == 0: + parsemode, start, lvl = mode(MODE_REGULAR), 0, 0 + else: + raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])' + result = [] + end = len(buf) + if lvl == 0 and parsemode == mode(MODE_REGULAR): + lineno = 1 + lvl = lvl + 1 + + ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')' + + # + # some of the more regular modes... + # + + if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)): + cstate = [] + newpos = start + curpmode = parsemode + while 1: + where = newpos + #print '\tnew round: ' + epsilon(buf, where) + if where == end: + if lvl > 1 or curpmode != mode(MODE_REGULAR): + # not the way we started... + raise EOFError, 'premature end of file.' + lle(lvl, buf, where) + # the real ending of lvl-1 parse + return end, result + + pos = rc_regular.search(buf, where) + + if pos < 0: + pos = end + + if pos != where: + newpos, c = pos, chunk(PLAIN, where, (where, pos)) + result.append(c) + continue + + + # + # ok, pos == where and pos != end + # + foundchar = buf[where] + if foundchar in my_cc[CC_LBRACE]: + # recursive subgroup parse... + newpos, data = parseit(buf, curpmode, where+1, lvl) + result.append(chunk(GROUP, where, data)) + + elif foundchar in my_cc[CC_RBRACE]: + if lvl <= 1: + raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where) + if lvl == 1 and mode != mode(MODE_REGULAR): + raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)' + return where + 1, result + + elif foundchar in my_cc[CC_ESCAPE]: # - # scan for a control sequence token. `\ape', `\nut' or `\%' + # call the routine that actually deals with + # this problem. If do_ret is None, than + # return the value of do_ret # - if start == end: - raise EOFError, 'can\'t find end of csname' - pos = rc_cs_scan.search(buf, start) - if pos < 0: - pos = end - if pos == start: - # first non-letter right where we started the search - # ---> the control sequence name consists of one single - # character. Also: don't eat white space... - if buf[pos] in my_cc[CC_ENDLINE]: - lineno = lineno + 1 - pos = pos + 1 - return pos, (start, pos) + # Note that handle_cs might call this routine + # recursively again... + # + do_ret, newpos = handlecs(buf, where, + curpmode, lvl, result, end) + if do_ret != None: + return do_ret + + elif foundchar in my_cc[CC_COMMENT]: + newpos, data = parseit(buf, + mode(MODE_COMMENT), where+1, lvl) + result.append(chunk(COMMENT, where, data)) + + elif foundchar in my_cc[CC_MATHSHIFT]: + # note that recursive calls to math-mode + # scanning are called with recursion-level 0 + # again, in order to check for bad mathend + # + if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]: + # + # double mathshift, e.g. '$$' + # + if curpmode == mode(MODE_REGULAR): + newpos, data = parseit(buf, + mode(MODE_DMATH), + where+2, 0) + result.append(chunk(DMATH, + where, data)) + elif curpmode == mode(MODE_MATH): + raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) + elif lvl != 1: + raise error, 'bad mathend.' + lle(lvl, buf, where) + else: + return where + 2, result else: - spos = pos - if buf[pos] == '\n': - lineno = lineno + 1 - spos = pos + 1 - pos2, dummy = parseit(buf, \ - mode(MODE_GOBBLEWHITE), spos, lvl) - return pos2, (start, pos) - - elif parsemode == mode(MODE_GOBBLEWHITE): - if start == end: - return start, '' - pos = rc_endwhite.search(buf, start) - if pos < 0: - pos = start - return pos, (start, pos) + # + # single math shift, e.g. '$' + # + if curpmode == mode(MODE_REGULAR): + newpos, data = parseit(buf, + mode(MODE_MATH), + where+1, 0) + result.append(chunk(MATH, + where, data)) + elif curpmode == mode(MODE_DMATH): + raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) + elif lvl != 1: + raise error, 'bad mathend.' + lv(lvl, buf, where) + else: + return where + 1, result + + elif foundchar in my_cc[CC_IGNORE]: + print 'warning: ignored char', `foundchar` + newpos = where + 1 + + elif foundchar in my_cc[CC_ACTIVE]: + result.append(chunk(ACTIVE, where, foundchar)) + newpos = where + 1 + + elif foundchar in my_cc[CC_INVALID]: + raise error, 'invalid char ' + `foundchar` + newpos = where + 1 + + elif foundchar in my_cc[CC_ENDLINE]: + # + # after an end of line, eat the rest of + # whitespace on the beginning of the next line + # this is what LaTeX more or less does + # + # also, try to indicate double newlines (\par) + # + lineno = lineno + 1 + savedwhere = where + newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl) + if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]: + result.append(chunk(DENDLINE, savedwhere, foundchar)) + else: + result.append(chunk(ENDLINE, savedwhere, foundchar)) + else: + result.append(chunk(OTHER, where, foundchar)) + newpos = where + 1 - elif parsemode == mode(MODE_COMMENT): - pos = rc_comment.search(buf, start) + elif parsemode == mode(MODE_CS_SCAN): + # + # scan for a control sequence token. `\ape', `\nut' or `\%' + # + if start == end: + raise EOFError, 'can\'t find end of csname' + pos = rc_cs_scan.search(buf, start) + if pos < 0: + pos = end + if pos == start: + # first non-letter right where we started the search + # ---> the control sequence name consists of one single + # character. Also: don't eat white space... + if buf[pos] in my_cc[CC_ENDLINE]: lineno = lineno + 1 - if pos < 0: - print 'no newline perhaps?' - raise EOFError, 'can\'t find end of comment' - pos = pos + 1 - pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl) - return pos2, (start, pos) + pos = pos + 1 + return pos, (start, pos) + else: + spos = pos + if buf[pos] == '\n': + lineno = lineno + 1 + spos = pos + 1 + pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), spos, lvl) + return pos2, (start, pos) + elif parsemode == mode(MODE_GOBBLEWHITE): + if start == end: + return start, '' + pos = rc_endwhite.search(buf, start) + if pos < 0: + pos = start + return pos, (start, pos) + + elif parsemode == mode(MODE_COMMENT): + pos = rc_comment.search(buf, start) + lineno = lineno + 1 + if pos < 0: + print 'no newline perhaps?' + raise EOFError, 'can\'t find end of comment' + pos = pos + 1 + pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl) + return pos2, (start, pos) - else: - raise error, 'Unknown mode (' + `parsemode` + ')' + + else: + raise error, 'Unknown mode (' + `parsemode` + ')' #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl) @@ -613,182 +605,182 @@ re_endverb = regex.compile(un_re(endverbstr)) # return with the data in return_data # def handlecs(buf, where, curpmode, lvl, result, end): - global lineno - - # get the control sequence name... - newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl) - saveddata = data - - if s(buf, data) in ('begin', 'end'): - # skip the expected '{' and get the LaTeX-envname '}' - newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl) - if len(data) != 1: - raise error, 'expected 1 chunk of data.' + \ - lle(lvl, buf, where) - - # yucky, we've got an environment - envname = s(buf, data[0].data) - ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl) - if s(buf, saveddata) == 'begin' and envname == 'verbatim': - # verbatim deserves special treatment - pos = re_endverb.search(buf, newpos) - if pos < 0: - raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where) - result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))]))) - newpos = pos + len(endverbstr) - - elif s(buf, saveddata) == 'begin': - # start parsing recursively... If that parse returns - # from an '\end{...}', then should the last item of - # the returned data be a string containing the ended - # environment - newpos, data = parseit(buf, curpmode, newpos, lvl) - if not data or type(data[-1]) is not StringType: - raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos) - retenv = data[-1] - del data[-1] - if retenv != envname: - #[`retenv`, `envname`] - raise error, 'environments do not match.' + \ - lle(lvl, buf, where) + \ - epsilon(buf, newpos) - result.append(chunk(ENV, where, (retenv, data))) - else: - # 'end'... append the environment name, as just - # pointed out, and order parsit to return... - result.append(envname) - ##print 'POINT of return: ' + epsilon(buf, newpos) - # the tuple will be returned by parseit - return (newpos, result), newpos - - # end of \begin ... \end handling - - elif s(buf, data)[0:2] == 'if': - # another scary monster: the 'if' directive - flag = s(buf, data)[2:] - - # recursively call parseit, just like environment above.. - # the last item of data should contain the if-termination - # e.g., 'else' of 'fi' - newpos, data = parseit(buf, curpmode, newpos, lvl) - if not data or data[-1] not in ('else', 'fi'): - raise error, 'wrong if... termination' + \ - lle(lvl, buf, where) + epsilon(buf, newpos) - - ifterm = data[-1] - del data[-1] - # 0 means dont_negate flag - result.append(chunk(IF, where, (flag, 0, data))) - if ifterm == 'else': - # do the whole thing again, there is only one way - # to end this one, by 'fi' - newpos, data = parseit(buf, curpmode, newpos, lvl) - if not data or data[-1] not in ('fi', ): - raise error, 'wrong if...else... termination' \ - + lle(lvl, buf, where) \ - + epsilon(buf, newpos) - - ifterm = data[-1] - del data[-1] - result.append(chunk(IF, where, (flag, 1, data))) - #done implicitely: return None, newpos - - elif s(buf, data) in ('else', 'fi'): - result.append(s(buf, data)) - # order calling party to return tuple - return (newpos, result), newpos - - # end of \if, \else, ... \fi handling - - elif s(buf, saveddata) == 'verb': - x2 = saveddata[1] - result.append(chunk(CSNAME, where, data)) - if x2 == end: - raise error, 'premature end of command.' + lle(lvl, buf, where) - delimchar = buf[x2] - ##print 'VERB: delimchar ' + `delimchar` - pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1) - if pos < 0: - raise error, 'end of \'verb\' argument (' + \ - `delimchar` + ') not found.' + \ - lle(lvl, buf, where) - result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))])) - newpos = pos + 1 + global lineno + + # get the control sequence name... + newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl) + saveddata = data + + if s(buf, data) in ('begin', 'end'): + # skip the expected '{' and get the LaTeX-envname '}' + newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl) + if len(data) != 1: + raise error, 'expected 1 chunk of data.' + \ + lle(lvl, buf, where) + + # yucky, we've got an environment + envname = s(buf, data[0].data) + ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl) + if s(buf, saveddata) == 'begin' and envname == 'verbatim': + # verbatim deserves special treatment + pos = re_endverb.search(buf, newpos) + if pos < 0: + raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where) + result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))]))) + newpos = pos + len(endverbstr) + + elif s(buf, saveddata) == 'begin': + # start parsing recursively... If that parse returns + # from an '\end{...}', then should the last item of + # the returned data be a string containing the ended + # environment + newpos, data = parseit(buf, curpmode, newpos, lvl) + if not data or type(data[-1]) is not StringType: + raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos) + retenv = data[-1] + del data[-1] + if retenv != envname: + #[`retenv`, `envname`] + raise error, 'environments do not match.' + \ + lle(lvl, buf, where) + \ + epsilon(buf, newpos) + result.append(chunk(ENV, where, (retenv, data))) else: - result.append(chunk(CSNAME, where, data)) - return None, newpos + # 'end'... append the environment name, as just + # pointed out, and order parsit to return... + result.append(envname) + ##print 'POINT of return: ' + epsilon(buf, newpos) + # the tuple will be returned by parseit + return (newpos, result), newpos + + # end of \begin ... \end handling + + elif s(buf, data)[0:2] == 'if': + # another scary monster: the 'if' directive + flag = s(buf, data)[2:] + + # recursively call parseit, just like environment above.. + # the last item of data should contain the if-termination + # e.g., 'else' of 'fi' + newpos, data = parseit(buf, curpmode, newpos, lvl) + if not data or data[-1] not in ('else', 'fi'): + raise error, 'wrong if... termination' + \ + lle(lvl, buf, where) + epsilon(buf, newpos) + + ifterm = data[-1] + del data[-1] + # 0 means dont_negate flag + result.append(chunk(IF, where, (flag, 0, data))) + if ifterm == 'else': + # do the whole thing again, there is only one way + # to end this one, by 'fi' + newpos, data = parseit(buf, curpmode, newpos, lvl) + if not data or data[-1] not in ('fi', ): + raise error, 'wrong if...else... termination' \ + + lle(lvl, buf, where) \ + + epsilon(buf, newpos) + + ifterm = data[-1] + del data[-1] + result.append(chunk(IF, where, (flag, 1, data))) + #done implicitely: return None, newpos + + elif s(buf, data) in ('else', 'fi'): + result.append(s(buf, data)) + # order calling party to return tuple + return (newpos, result), newpos + + # end of \if, \else, ... \fi handling + + elif s(buf, saveddata) == 'verb': + x2 = saveddata[1] + result.append(chunk(CSNAME, where, data)) + if x2 == end: + raise error, 'premature end of command.' + lle(lvl, buf, where) + delimchar = buf[x2] + ##print 'VERB: delimchar ' + `delimchar` + pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1) + if pos < 0: + raise error, 'end of \'verb\' argument (' + \ + `delimchar` + ') not found.' + \ + lle(lvl, buf, where) + result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))])) + newpos = pos + 1 + else: + result.append(chunk(CSNAME, where, data)) + return None, newpos # this is just a function to get the string value if the possible data-tuple def s(buf, data): - if type(data) is StringType: - return data - if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType): - raise TypeError, 'expected tuple of 2 integers' - x1, x2 = data - return buf[x1:x2] + if type(data) is StringType: + return data + if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType): + raise TypeError, 'expected tuple of 2 integers' + x1, x2 = data + return buf[x1:x2] ##length, data1, i = getnextarg(length, buf, pp, i + 1) # make a deep-copy of some chunks def crcopy(r): - return map(chunkcopy, r) + return map(chunkcopy, r) # copy a chunk, would better be a method of class Chunk... def chunkcopy(ch): - if ch.chtype == chunk_type(GROUP): - return chunk(GROUP, ch.where, map(chunkcopy, ch.data)) - else: - return chunk(ch.chtype, ch.where, ch.data) + if ch.chtype == chunk_type(GROUP): + return chunk(GROUP, ch.where, map(chunkcopy, ch.data)) + else: + return chunk(ch.chtype, ch.where, ch.data) # get next argument for TeX-macro, flatten a group (insert between) # or return Command Sequence token, or give back one character def getnextarg(length, buf, pp, item): - ##wobj = Wobj() - ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) - ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' - - while item < length and pp[item].chtype == chunk_type(ENDLINE): - del pp[item] - length = length - 1 - if item >= length: - raise error, 'no next arg.' + epsilon(buf, pp[-1].where) - if pp[item].chtype == chunk_type(GROUP): - newpp = pp[item].data - del pp[item] - length = length - 1 - changeit(buf, newpp) - length = length + len(newpp) - pp[item:item] = newpp - item = item + len(newpp) - if len(newpp) < 10: - wobj = Wobj() - dumpit(buf, wobj.write, newpp) - ##print 'GETNEXTARG: inserted ' + `wobj.data` - return length, item - elif pp[item].chtype == chunk_type(PLAIN): - #grab one char - print 'WARNING: grabbing one char' - if len(s(buf, pp[item].data)) > 1: - pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1])) - item, length = item+1, length+1 - pp[item].data = s(buf, pp[item].data)[1:] - else: - item = item+1 - return length, item + ##wobj = Wobj() + ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) + ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + + while item < length and pp[item].chtype == chunk_type(ENDLINE): + del pp[item] + length = length - 1 + if item >= length: + raise error, 'no next arg.' + epsilon(buf, pp[-1].where) + if pp[item].chtype == chunk_type(GROUP): + newpp = pp[item].data + del pp[item] + length = length - 1 + changeit(buf, newpp) + length = length + len(newpp) + pp[item:item] = newpp + item = item + len(newpp) + if len(newpp) < 10: + wobj = Wobj() + dumpit(buf, wobj.write, newpp) + ##print 'GETNEXTARG: inserted ' + `wobj.data` + return length, item + elif pp[item].chtype == chunk_type(PLAIN): + #grab one char + print 'WARNING: grabbing one char' + if len(s(buf, pp[item].data)) > 1: + pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1])) + item, length = item+1, length+1 + pp[item].data = s(buf, pp[item].data)[1:] else: - ch = pp[item] - try: - str = `s(buf, ch.data)` - except TypeError: - str = `ch.data` - if len(str) > 400: - str = str[:400] + '...' - print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str - return length, item + item = item+1 + return length, item + else: + ch = pp[item] + try: + str = `s(buf, ch.data)` + except TypeError: + str = `ch.data` + if len(str) > 400: + str = str[:400] + '...' + print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str + return length, item # this one is needed to find the end of LaTeX's optional argument, like @@ -798,52 +790,52 @@ re_endopt = regex.compile(']') # get a LaTeX-optional argument, you know, the square braces '[' and ']' def getoptarg(length, buf, pp, item): - wobj = Wobj() - dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) - ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + wobj = Wobj() + dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) + ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + + if item >= length or \ + pp[item].chtype != chunk_type(PLAIN) or \ + s(buf, pp[item].data)[0] != '[': + return length, item + + pp[item].data = s(buf, pp[item].data)[1:] + if len(pp[item].data) == 0: + del pp[item] + length = length-1 + + while 1: + if item == length: + raise error, 'No end of optional arg found' + if pp[item].chtype == chunk_type(PLAIN): + text = s(buf, pp[item].data) + pos = re_endopt.search(text) + if pos >= 0: + pp[item].data = text[:pos] + if pos == 0: + del pp[item] + length = length-1 + else: + item=item+1 + text = text[pos+1:] - if item >= length or \ - pp[item].chtype != chunk_type(PLAIN) or \ - s(buf, pp[item].data)[0] != '[': - return length, item + while text and text[0] in ' \t': + text = text[1:] - pp[item].data = s(buf, pp[item].data)[1:] - if len(pp[item].data) == 0: - del pp[item] - length = length-1 + if text: + pp.insert(item, chunk(PLAIN, 0, text)) + length = length + 1 + return length, item - while 1: - if item == length: - raise error, 'No end of optional arg found' - if pp[item].chtype == chunk_type(PLAIN): - text = s(buf, pp[item].data) - pos = re_endopt.search(text) - if pos >= 0: - pp[item].data = text[:pos] - if pos == 0: - del pp[item] - length = length-1 - else: - item=item+1 - text = text[pos+1:] - - while text and text[0] in ' \t': - text = text[1:] - - if text: - pp.insert(item, chunk(PLAIN, 0, text)) - length = length + 1 - return length, item - - item = item+1 + item = item+1 # Wobj just add write-requests to the ``data'' attribute class Wobj: - data = '' + data = '' - def write(self, data): - self.data = self.data + data + def write(self, data): + self.data = self.data + data # ignore these commands ignoredcommands = ('bcode', 'ecode') @@ -855,7 +847,7 @@ themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves inargsselves = (',', '[', ']', '(', ')') # this is how *I* would show the difference between emph and strong # code 1 means: fold to uppercase -markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \ +markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), 'strong': ('*', '*')} # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT} @@ -868,95 +860,95 @@ for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', # try to remove macros and return flat text def flattext(buf, pp): - pp = crcopy(pp) - ##print '---> FLATTEXT ' + `pp` - wobj = Wobj() - - i, length = 0, len(pp) - while 1: - if len(pp) != length: - raise 'FATAL', 'inconsistent length' - if i >= length: - break - ch = pp[i] - i = i+1 - if ch.chtype == chunk_type(PLAIN): - pass - elif ch.chtype == chunk_type(CSNAME): - if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves: - ch.chtype = chunk_type(PLAIN) - elif s(buf, ch.data) == 'e': - ch.chtype = chunk_type(PLAIN) - ch.data = '\\' - elif len(s(buf, ch.data)) == 1 \ - and s(buf, ch.data) in onlylatexspecial: - ch.chtype = chunk_type(PLAIN) - # if it is followed by an empty group, - # remove that group, it was needed for - # a true space - if i < length \ - and pp[i].chtype==chunk_type(GROUP) \ - and len(pp[i].data) == 0: - del pp[i] - length = length-1 - - elif s(buf, ch.data) in markcmds.keys(): - length, newi = getnextarg(length, buf, pp, i) - str = flattext(buf, pp[i:newi]) - del pp[i:newi] - length = length - (newi - i) - ch.chtype = chunk_type(PLAIN) - markcmd = s(buf, ch.data) - x = markcmds[markcmd] - if type(x) == TupleType: - pre, after = x - str = pre+str+after - elif x == 1: - str = string.upper(str) - else: - raise 'FATAL', 'corrupt markcmds' - ch.data = str - else: - if s(buf, ch.data) not in ignoredcommands: - print 'WARNING: deleting command ' + `s(buf, ch.data)` - print 'PP' + `pp[i-1]` - del pp[i-1] - i, length = i-1, length-1 - elif ch.chtype == chunk_type(GROUP): - length, newi = getnextarg(length, buf, pp, i-1) - i = i-1 + pp = crcopy(pp) + ##print '---> FLATTEXT ' + `pp` + wobj = Wobj() + + i, length = 0, len(pp) + while 1: + if len(pp) != length: + raise 'FATAL', 'inconsistent length' + if i >= length: + break + ch = pp[i] + i = i+1 + if ch.chtype == chunk_type(PLAIN): + pass + elif ch.chtype == chunk_type(CSNAME): + if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves: + ch.chtype = chunk_type(PLAIN) + elif s(buf, ch.data) == 'e': + ch.chtype = chunk_type(PLAIN) + ch.data = '\\' + elif len(s(buf, ch.data)) == 1 \ + and s(buf, ch.data) in onlylatexspecial: + ch.chtype = chunk_type(PLAIN) + # if it is followed by an empty group, + # remove that group, it was needed for + # a true space + if i < length \ + and pp[i].chtype==chunk_type(GROUP) \ + and len(pp[i].data) == 0: + del pp[i] + length = length-1 + + elif s(buf, ch.data) in markcmds.keys(): + length, newi = getnextarg(length, buf, pp, i) + str = flattext(buf, pp[i:newi]) + del pp[i:newi] + length = length - (newi - i) + ch.chtype = chunk_type(PLAIN) + markcmd = s(buf, ch.data) + x = markcmds[markcmd] + if type(x) == TupleType: + pre, after = x + str = pre+str+after + elif x == 1: + str = string.upper(str) + else: + raise 'FATAL', 'corrupt markcmds' + ch.data = str + else: + if s(buf, ch.data) not in ignoredcommands: + print 'WARNING: deleting command ' + `s(buf, ch.data)` + print 'PP' + `pp[i-1]` + del pp[i-1] + i, length = i-1, length-1 + elif ch.chtype == chunk_type(GROUP): + length, newi = getnextarg(length, buf, pp, i-1) + i = i-1 ## str = flattext(buf, crcopy(pp[i-1:newi])) ## del pp[i:newi] ## length = length - (newi - i) ## ch.chtype = chunk_type(PLAIN) ## ch.data = str - else: - pass - - dumpit(buf, wobj.write, pp) - ##print 'FLATTEXT: RETURNING ' + `wobj.data` - return wobj.data + else: + pass + + dumpit(buf, wobj.write, pp) + ##print 'FLATTEXT: RETURNING ' + `wobj.data` + return wobj.data # try to generate node names (a bit shorter than the chapter title) # note that the \nodename command (see elsewhere) overules these efforts def invent_node_names(text): - words = string.split(text) - - ##print 'WORDS ' + `words` - - if len(words) == 2 \ - and string.lower(words[0]) == 'built-in' \ - and string.lower(words[1]) not in ('modules', 'functions'): - return words[1] - if len(words) == 3 and string.lower(words[1]) == 'module': - return words[2] - if len(words) == 3 and string.lower(words[1]) == 'object': - return string.join(words[0:2]) - if len(words) > 4 and string.lower(string.join(words[-4:])) == \ - 'methods and data attributes': - return string.join(words[:2]) - return text - + words = string.split(text) + + ##print 'WORDS ' + `words` + + if len(words) == 2 \ + and string.lower(words[0]) == 'built-in' \ + and string.lower(words[1]) not in ('modules', 'functions'): + return words[1] + if len(words) == 3 and string.lower(words[1]) == 'module': + return words[2] + if len(words) == 3 and string.lower(words[1]) == 'object': + return string.join(words[0:2]) + if len(words) > 4 and string.lower(string.join(words[-4:])) == \ + 'methods and data attributes': + return string.join(words[:2]) + return text + re_commas_etc = regex.compile('[,`\'@{}]') re_whitespace = regex.compile('[ \t]*') @@ -969,27 +961,27 @@ re_whitespace = regex.compile('[ \t]*') # Sometimes this is too much, maybe consider DENDLINE's as stop def next_command_p(length, buf, pp, i, cmdname): - while 1: - if i >= len(pp): - break - ch = pp[i] - i = i+1 - if ch.chtype == chunk_type(ENDLINE): - continue - if ch.chtype == chunk_type(DENDLINE): - continue - if ch.chtype == chunk_type(PLAIN): - if re_whitespace.search(s(buf, ch.data)) == 0 and \ - re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)): - continue - return -1 - if ch.chtype == chunk_type(CSNAME): - if s(buf, ch.data) == cmdname: - return i # _after_ the command - return -1 - return -1 - - + while 1: + if i >= len(pp): + break + ch = pp[i] + i = i+1 + if ch.chtype == chunk_type(ENDLINE): + continue + if ch.chtype == chunk_type(DENDLINE): + continue + if ch.chtype == chunk_type(PLAIN): + if re_whitespace.search(s(buf, ch.data)) == 0 and \ + re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)): + continue + return -1 + if ch.chtype == chunk_type(CSNAME): + if s(buf, ch.data) == cmdname: + return i # _after_ the command + return -1 + return -1 + + # things that are special to LaTeX, but not to texi.. onlylatexspecial = '_~^$#&%' @@ -999,17 +991,17 @@ hist = Struct() out = Struct() def startchange(): - global hist, out + global hist, out - hist.inenv = [] - hist.nodenames = [] - hist.cindex = [] - hist.inargs = 0 - hist.enumeratenesting, hist.itemizenesting = 0, 0 + hist.inenv = [] + hist.nodenames = [] + hist.cindex = [] + hist.inargs = 0 + hist.enumeratenesting, hist.itemizenesting = 0, 0 + + out.doublenodes = [] + out.doublecindeces = [] - out.doublenodes = [] - out.doublecindeces = [] - spacech = [chunk(PLAIN, 0, ' ')] commach = [chunk(PLAIN, 0, ', ')] @@ -1051,174 +1043,173 @@ enumeratesymbols = ['1', 'A', 'a'] ## or \funcline{NAME}{ARGS} ## def do_funcdesc(length, buf, pp, i): - startpoint = i-1 - ch = pp[startpoint] - wh = ch.where - length, newi = getnextarg(length, buf, pp, i) - funcname = chunk(GROUP, wh, pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - save = hist.inargs - hist.inargs = 1 - length, newi = getnextarg(length, buf, pp, i) - hist.inargs = save - del save - the_args = [chunk(PLAIN, wh, '()'[0])] + \ - pp[i:newi] + \ - [chunk(PLAIN, wh, '()'[1])] - del pp[i:newi] - length = length - (newi-i) - - idxsi = hist.indexsubitem # words - command = '' - cat_class = '' - if idxsi and idxsi[-1] in ('method', 'protocol'): - command = 'defmethod' - cat_class = string.join(idxsi[:-1]) - elif len(idxsi) == 2 and idxsi[1] == 'function': - command = 'deffn' - cat_class = string.join(idxsi) - elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: - command = 'deffn' - cat_class = 'function of ' + string.join(idxsi[1:]) - - if not command: - raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] - cslinearg.append(chunk(PLAIN, wh, ' ')) - cslinearg.append(funcname) - cslinearg.append(chunk(PLAIN, wh, ' ')) - l = len(cslinearg) - cslinearg[l:l] = the_args + startpoint = i-1 + ch = pp[startpoint] + wh = ch.where + length, newi = getnextarg(length, buf, pp, i) + funcname = chunk(GROUP, wh, pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + save = hist.inargs + hist.inargs = 1 + length, newi = getnextarg(length, buf, pp, i) + hist.inargs = save + del save + the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \ + [chunk(PLAIN, wh, '()'[1])] + del pp[i:newi] + length = length - (newi-i) + + idxsi = hist.indexsubitem # words + command = '' + cat_class = '' + if idxsi and idxsi[-1] in ('method', 'protocol'): + command = 'defmethod' + cat_class = string.join(idxsi[:-1]) + elif len(idxsi) == 2 and idxsi[1] == 'function': + command = 'deffn' + cat_class = string.join(idxsi) + elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: + command = 'deffn' + cat_class = 'function of ' + string.join(idxsi[1:]) + + if not command: + raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + cslinearg.append(chunk(PLAIN, wh, ' ')) + cslinearg.append(funcname) + cslinearg.append(chunk(PLAIN, wh, ' ')) + l = len(cslinearg) + cslinearg[l:l] = the_args + + pp.insert(i, chunk(GROUP, wh, cslinearg)) + i, length = i+1, length+1 + hist.command = command + return length, i - pp.insert(i, chunk(GROUP, wh, cslinearg)) - i, length = i+1, length+1 - hist.command = command - return length, i - ## this routine will be called on \begin{excdesc}{NAME} ## or \excline{NAME} ## def do_excdesc(length, buf, pp, i): - startpoint = i-1 - ch = pp[startpoint] - wh = ch.where - length, newi = getnextarg(length, buf, pp, i) - excname = chunk(GROUP, wh, pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - idxsi = hist.indexsubitem # words - command = '' - cat_class = '' - class_class = '' - if len(idxsi) == 2 and idxsi[1] == 'exception': - command = 'defvr' - cat_class = string.join(idxsi) - elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: - command = 'defcv' - cat_class = 'exception' - class_class = string.join(idxsi[1:]) - elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']: - command = 'defcv' - cat_class = 'exception' - class_class = string.join(idxsi[2:]) - - - if not command: - raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + startpoint = i-1 + ch = pp[startpoint] + wh = ch.where + length, newi = getnextarg(length, buf, pp, i) + excname = chunk(GROUP, wh, pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + idxsi = hist.indexsubitem # words + command = '' + cat_class = '' + class_class = '' + if len(idxsi) == 2 and idxsi[1] == 'exception': + command = 'defvr' + cat_class = string.join(idxsi) + elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: + command = 'defcv' + cat_class = 'exception' + class_class = string.join(idxsi[1:]) + elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']: + command = 'defcv' + cat_class = 'exception' + class_class = string.join(idxsi[2:]) + + + if not command: + raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + cslinearg.append(chunk(PLAIN, wh, ' ')) + if class_class: + cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) cslinearg.append(chunk(PLAIN, wh, ' ')) - if class_class: - cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) - cslinearg.append(chunk(PLAIN, wh, ' ')) - cslinearg.append(excname) + cslinearg.append(excname) - pp.insert(i, chunk(GROUP, wh, cslinearg)) - i, length = i+1, length+1 - hist.command = command - return length, i + pp.insert(i, chunk(GROUP, wh, cslinearg)) + i, length = i+1, length+1 + hist.command = command + return length, i ## same for datadesc or dataline... def do_datadesc(length, buf, pp, i): - startpoint = i-1 - ch = pp[startpoint] - wh = ch.where - length, newi = getnextarg(length, buf, pp, i) - dataname = chunk(GROUP, wh, pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - idxsi = hist.indexsubitem # words - command = '' - cat_class = '' - class_class = '' - if idxsi[-1] in ('attribute', 'option'): - command = 'defcv' - cat_class = idxsi[-1] - class_class = string.join(idxsi[:-1]) - elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: - command = 'defcv' - cat_class = 'data' - class_class = string.join(idxsi[1:]) - elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']: - command = 'defcv' - cat_class = 'data' - class_class = string.join(idxsi[2:]) - - - if not command: - raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + startpoint = i-1 + ch = pp[startpoint] + wh = ch.where + length, newi = getnextarg(length, buf, pp, i) + dataname = chunk(GROUP, wh, pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + idxsi = hist.indexsubitem # words + command = '' + cat_class = '' + class_class = '' + if idxsi[-1] in ('attribute', 'option'): + command = 'defcv' + cat_class = idxsi[-1] + class_class = string.join(idxsi[:-1]) + elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: + command = 'defcv' + cat_class = 'data' + class_class = string.join(idxsi[1:]) + elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']: + command = 'defcv' + cat_class = 'data' + class_class = string.join(idxsi[2:]) + + + if not command: + raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + cslinearg.append(chunk(PLAIN, wh, ' ')) + if class_class: + cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) cslinearg.append(chunk(PLAIN, wh, ' ')) - if class_class: - cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) - cslinearg.append(chunk(PLAIN, wh, ' ')) - cslinearg.append(dataname) + cslinearg.append(dataname) + + pp.insert(i, chunk(GROUP, wh, cslinearg)) + i, length = i+1, length+1 + hist.command = command + return length, i - pp.insert(i, chunk(GROUP, wh, cslinearg)) - i, length = i+1, length+1 - hist.command = command - return length, i - # regular indices: those that are not set in tt font by default.... regindices = ('cindex', ) # remove illegal characters from node names def rm_commas_etc(text): - result = '' - changed = 0 - while 1: - pos = re_commas_etc.search(text) - if pos >= 0: - changed = 1 - result = result + text[:pos] - text = text[pos+1:] - else: - result = result + text - break - if changed: - print 'Warning: nodename changhed to ' + `result` + result = '' + changed = 0 + while 1: + pos = re_commas_etc.search(text) + if pos >= 0: + changed = 1 + result = result + text[:pos] + text = text[pos+1:] + else: + result = result + text + break + if changed: + print 'Warning: nodename changhed to ' + `result` + + return result - return result - # boolean flags flags = {'texi': 1} - + ## ## changeit: the actual routine, that changes the contents of the parsed @@ -1226,932 +1217,927 @@ flags = {'texi': 1} ## def changeit(buf, pp): - global onlylatexspecial, hist, out + global onlylatexspecial, hist, out + + i, length = 0, len(pp) + while 1: + # sanity check: length should always equal len(pp) + if len(pp) != length: + raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)` + if i >= length: + break + ch = pp[i] + i = i + 1 + + if type(ch) is StringType: + #normally, only chunks are present in pp, + # but in some cases, some extra info + # has been inserted, e.g., the \end{...} clauses + raise 'FATAL', 'got string, probably too many ' + `end` - i, length = 0, len(pp) - while 1: - # sanity check: length should always equal len(pp) - if len(pp) != length: - raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)` - if i >= length: - break - ch = pp[i] - i = i + 1 + if ch.chtype == chunk_type(GROUP): + # check for {\em ...} constructs + if ch.data and \ + ch.data[0].chtype == chunk_type(CSNAME) and \ + s(buf, ch.data[0].data) in fontchanges.keys(): + k = s(buf, ch.data[0].data) + del ch.data[0] + pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k])) + length, i = length+1, i+1 + + # recursively parse the contents of the group + changeit(buf, ch.data) + + elif ch.chtype == chunk_type(IF): + # \if... + flag, negate, data = ch.data + ##print 'IF: flag, negate = ' + `flag, negate` + if flag not in flags.keys(): + raise error, 'unknown flag ' + `flag` + + value = flags[flag] + if negate: + value = (not value) + del pp[i-1] + length, i = length-1, i-1 + if value: + pp[i:i] = data + length = length + len(data) + + + elif ch.chtype == chunk_type(ENV): + # \begin{...} .... + envname, data = ch.data + + #push this environment name on stack + hist.inenv.insert(0, envname) + + #append an endenv chunk after grouped data + data.append(chunk(ENDENV, ch.where, envname)) + ##[`data`] + + #delete this object + del pp[i-1] + i, length = i-1, length-1 + + #insert found data + pp[i:i] = data + length = length + len(data) + + if envname == 'verbatim': + pp[i:i] = [chunk(CSLINE, ch.where, 'example'), + chunk(GROUP, ch.where, [])] + length, i = length+2, i+2 + + elif envname == 'itemize': + if hist.itemizenesting > len(itemizesymbols): + raise error, 'too deep itemize nesting' + ingroupch = [chunk(CSNAME, ch.where, + itemizesymbols[hist.itemizenesting])] + hist.itemizenesting = hist.itemizenesting + 1 + pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'), + chunk(GROUP, ch.where, ingroupch)] + length, i = length+2, i+2 + + elif envname == 'enumerate': + if hist.enumeratenesting > len(enumeratesymbols): + raise error, 'too deep enumerate nesting' + ingroupch = [chunk(PLAIN, ch.where, + enumeratesymbols[hist.enumeratenesting])] + hist.enumeratenesting = hist.enumeratenesting + 1 + pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'), + chunk(GROUP, ch.where, ingroupch)] + length, i = length+2, i+2 + + elif envname == 'description': + ingroupch = [chunk(CSNAME, ch.where, 'b')] + pp[i:i] = [chunk(CSLINE, ch.where, 'table'), + chunk(GROUP, ch.where, ingroupch)] + length, i = length+2, i+2 + + elif (envname == 'tableiii') or (envname == 'tableii'): + if (envname == 'tableii'): + ltable = 2 + else: + ltable = 3 + wh = ch.where + newcode = [] + + #delete tabular format description + # e.g., {|l|c|l|} + length, newi = getnextarg(length, buf, pp, i) + del pp[i:newi] + length = length - (newi-i) + + newcode.append(chunk(CSLINE, wh, 'table')) + ingroupch = [chunk(CSNAME, wh, 'asis')] + newcode.append(chunk(GROUP, wh, ingroupch)) + newcode.append(chunk(CSLINE, wh, 'item')) + + #get the name of macro for @item + # e.g., {code} + length, newi = getnextarg(length, buf, pp, i) + + if newi-i != 1: + raise error, 'Sorry, expected 1 chunk argument' + if pp[i].chtype != chunk_type(PLAIN): + raise error, 'Sorry, expected plain text argument' + hist.itemargmacro = s(buf, pp[i].data) + del pp[i:newi] + length = length - (newi-i) + + itembody = [] + for count in range(ltable): + length, newi = getnextarg(length, buf, pp, i) + emphgroup = [ + chunk(CSNAME, wh, 'emph'), + chunk(GROUP, 0, pp[i:newi])] + del pp[i:newi] + length = length - (newi-i) + if count == 0: + itemarg = emphgroup + elif count == ltable-1: + itembody = itembody + \ + [chunk(PLAIN, wh, ' --- ')] + emphgroup + else: + itembody = emphgroup + newcode.append(chunk(GROUP, wh, itemarg)) + newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')] + pp[i:i] = newcode + l = len(newcode) + length, i = length+l, i+l + del newcode, l + + if length != len(pp): + raise 'STILL, SOMETHING wrong', `i` + + + elif envname == 'funcdesc': + pp.insert(i, chunk(PLAIN, ch.where, '')) + i, length = i+1, length+1 + length, i = do_funcdesc(length, buf, pp, i) + + elif envname == 'excdesc': + pp.insert(i, chunk(PLAIN, ch.where, '')) + i, length = i+1, length+1 + length, i = do_excdesc(length, buf, pp, i) + + elif envname == 'datadesc': + pp.insert(i, chunk(PLAIN, ch.where, '')) + i, length = i+1, length+1 + length, i = do_datadesc(length, buf, pp, i) + + else: + print 'WARNING: don\'t know what to do with env ' + `envname` + + elif ch.chtype == chunk_type(ENDENV): + envname = ch.data + if envname != hist.inenv[0]: + raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]` + del hist.inenv[0] + del pp[i-1] + i, length = i-1, length-1 + + if envname == 'verbatim': + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'example')])] + i, length = i+2, length+2 + elif envname == 'itemize': + hist.itemizenesting = hist.itemizenesting - 1 + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'itemize')])] + i, length = i+2, length+2 + elif envname == 'enumerate': + hist.enumeratenesting = hist.enumeratenesting-1 + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'enumerate')])] + i, length = i+2, length+2 + elif envname == 'description': + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'table')])] + i, length = i+2, length+2 + elif (envname == 'tableiii') or (envname == 'tableii'): + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'table')])] + i, length = i+2, length + 2 + pp.insert(i, chunk(DENDLINE, ch.where, '\n')) + i, length = i+1, length+1 + + elif envname in ('funcdesc', 'excdesc', 'datadesc'): + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + else: + print 'WARNING: ending env ' + `envname` + 'has no actions' + + elif ch.chtype == chunk_type(CSNAME): + # control name transformations + if s(buf, ch.data) == 'optional': + pp[i-1].chtype = chunk_type (PLAIN) + pp[i-1].data = '[' + if (i < length) and \ + (pp[i].chtype == chunk_type(GROUP)): + cp=pp[i].data + pp[i:i+1]=cp + [ + chunk(PLAIN, ch.where, ']')] + length = length+len(cp) + elif s(buf, ch.data) in ignoredcommands: + del pp[i-1] + i, length = i-1, length-1 + elif s(buf, ch.data) == '@' and \ + i != length and \ + pp[i].chtype == chunk_type(PLAIN) and \ + s(buf, pp[i].data)[0] == '.': + # \@. --> \. --> @. + ch.data = '.' + del pp[i] + length = length-1 + elif s(buf, ch.data) == '\\': + # \\ --> \* --> @* + ch.data = '*' + elif len(s(buf, ch.data)) == 1 and \ + s(buf, ch.data) in onlylatexspecial: + ch.chtype = chunk_type(PLAIN) + # check if such a command is followed by + # an empty group: e.g., `\%{}'. If so, remove + # this empty group too + if i < length and \ + pp[i].chtype == chunk_type(GROUP) \ + and len(pp[i].data) == 0: + del pp[i] + length = length-1 + + elif hist.inargs and s(buf, ch.data) in inargsselves: + # This is the special processing of the + # arguments of the \begin{funcdesc}... or + # \funcline... arguments + # \, --> , \[ --> [, \] --> ] + ch.chtype = chunk_type(PLAIN) + + elif s(buf, ch.data) == 'renewcommand': + # \renewcommand{\indexsubitem}.... + i, length = i-1, length-1 + del pp[i] + length, newi = getnextarg(length, buf, pp, i) + if newi-i == 1 \ + and i < length \ + and pp[i].chtype == chunk_type(CSNAME) \ + and s(buf, pp[i].data) == 'indexsubitem': + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + text = flattext(buf, pp[i:newi]) + if text[:1] != '(' or text[-1:] != ')': + raise error, 'expected indexsubitme enclosed in braces' + words = string.split(text[1:-1]) + hist.indexsubitem = words + del text, words + else: + print 'WARNING: renewcommand with unsupported arg removed' + del pp[i:newi] + length = length - (newi-i) + + elif s(buf, ch.data) == 'item': + ch.chtype = chunk_type(CSLINE) + length, newi = getoptarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + i, length = i+1, length+1 + + elif s(buf, ch.data) == 'ttindex': + idxsi = hist.indexsubitem + + cat_class = '' + if len(idxsi) >= 2 and idxsi[1] in \ + ('method', 'function', 'protocol'): + command = 'findex' + elif len(idxsi) >= 2 and idxsi[1] in \ + ('exception', 'object'): + command = 'vindex' + else: + print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command' + command = 'cindex' - if type(ch) is StringType: - #normally, only chunks are present in pp, - # but in some cases, some extra info - # has been inserted, e.g., the \end{...} clauses - raise 'FATAL', 'got string, probably too many ' + `end` - - if ch.chtype == chunk_type(GROUP): - # check for {\em ...} constructs - if ch.data and \ - ch.data[0].chtype == chunk_type(CSNAME) and \ - s(buf, ch.data[0].data) in fontchanges.keys(): - k = s(buf, ch.data[0].data) - del ch.data[0] - pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k])) - length, i = length+1, i+1 - - # recursively parse the contents of the group - changeit(buf, ch.data) - - elif ch.chtype == chunk_type(IF): - # \if... - flag, negate, data = ch.data - ##print 'IF: flag, negate = ' + `flag, negate` - if flag not in flags.keys(): - raise error, 'unknown flag ' + `flag` - - value = flags[flag] - if negate: - value = (not value) - del pp[i-1] - length, i = length-1, i-1 - if value: - pp[i:i] = data - length = length + len(data) - - - elif ch.chtype == chunk_type(ENV): - # \begin{...} .... - envname, data = ch.data - - #push this environment name on stack - hist.inenv.insert(0, envname) - - #append an endenv chunk after grouped data - data.append(chunk(ENDENV, ch.where, envname)) - ##[`data`] - - #delete this object - del pp[i-1] - i, length = i-1, length-1 - - #insert found data - pp[i:i] = data - length = length + len(data) - - if envname == 'verbatim': - pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \ - chunk(GROUP, ch.where, [])] - length, i = length+2, i+2 - - elif envname == 'itemize': - if hist.itemizenesting > len(itemizesymbols): - raise error, 'too deep itemize nesting' - ingroupch = [chunk(CSNAME, ch.where,\ - itemizesymbols[hist.itemizenesting])] - hist.itemizenesting = hist.itemizenesting + 1 - pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\ - chunk(GROUP, ch.where, ingroupch)] - length, i = length+2, i+2 - - elif envname == 'enumerate': - if hist.enumeratenesting > len(enumeratesymbols): - raise error, 'too deep enumerate nesting' - ingroupch = [chunk(PLAIN, ch.where,\ - enumeratesymbols[hist.enumeratenesting])] - hist.enumeratenesting = hist.enumeratenesting + 1 - pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\ - chunk(GROUP, ch.where, ingroupch)] - length, i = length+2, i+2 - - elif envname == 'description': - ingroupch = [chunk(CSNAME, ch.where, 'b')] - pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \ - chunk(GROUP, ch.where, ingroupch)] - length, i = length+2, i+2 - - elif (envname == 'tableiii') or \ - (envname == 'tableii'): - if (envname == 'tableii'): - ltable = 2 - else: - ltable = 3 - wh = ch.where - newcode = [] - - #delete tabular format description - # e.g., {|l|c|l|} - length, newi = getnextarg(length, buf, pp, i) - del pp[i:newi] - length = length - (newi-i) - - newcode.append(chunk(CSLINE, wh, 'table')) - ingroupch = [chunk(CSNAME, wh, 'asis')] - newcode.append(chunk(GROUP, wh, ingroupch)) - newcode.append(chunk(CSLINE, wh, 'item')) - - #get the name of macro for @item - # e.g., {code} - length, newi = getnextarg(length, buf, pp, i) - - if newi-i != 1: - raise error, 'Sorry, expected 1 chunk argument' - if pp[i].chtype != chunk_type(PLAIN): - raise error, 'Sorry, expected plain text argument' - hist.itemargmacro = s(buf, pp[i].data) - del pp[i:newi] - length = length - (newi-i) - - itembody = [] - for count in range(ltable): - length, newi = getnextarg(length, buf, pp, i) - emphgroup = [\ - chunk(CSNAME, wh, 'emph'), \ - chunk(GROUP, 0, pp[i:newi])] - del pp[i:newi] - length = length - (newi-i) - if count == 0: - itemarg = emphgroup - elif count == ltable-1: - itembody = itembody + \ - [chunk(PLAIN, wh, ' --- ')] + \ - emphgroup - else: - itembody = emphgroup - newcode.append(chunk(GROUP, wh, itemarg)) - newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')] - pp[i:i] = newcode - l = len(newcode) - length, i = length+l, i+l - del newcode, l - - if length != len(pp): - raise 'STILL, SOMETHING wrong', `i` - - - elif envname == 'funcdesc': - pp.insert(i, chunk(PLAIN, ch.where, '')) - i, length = i+1, length+1 - length, i = do_funcdesc(length, buf, pp, i) - - elif envname == 'excdesc': - pp.insert(i, chunk(PLAIN, ch.where, '')) - i, length = i+1, length+1 - length, i = do_excdesc(length, buf, pp, i) - - elif envname == 'datadesc': - pp.insert(i, chunk(PLAIN, ch.where, '')) - i, length = i+1, length+1 - length, i = do_datadesc(length, buf, pp, i) - - else: - print 'WARNING: don\'t know what to do with env ' + `envname` - - elif ch.chtype == chunk_type(ENDENV): - envname = ch.data - if envname != hist.inenv[0]: - raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]` - del hist.inenv[0] - del pp[i-1] - i, length = i-1, length-1 - - if envname == 'verbatim': - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'example')])] - i, length = i+2, length+2 - elif envname == 'itemize': - hist.itemizenesting = hist.itemizenesting - 1 - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'itemize')])] - i, length = i+2, length+2 - elif envname == 'enumerate': - hist.enumeratenesting = hist.enumeratenesting-1 - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'enumerate')])] - i, length = i+2, length+2 - elif envname == 'description': - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'table')])] - i, length = i+2, length+2 - elif (envname == 'tableiii') or (envname == 'tableii'): - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'table')])] - i, length = i+2, length + 2 - pp.insert(i, chunk(DENDLINE, ch.where, '\n')) - i, length = i+1, length+1 - - elif envname in ('funcdesc', 'excdesc', 'datadesc'): - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - else: - print 'WARNING: ending env ' + `envname` + 'has no actions' - - elif ch.chtype == chunk_type(CSNAME): - # control name transformations - if s(buf, ch.data) == 'optional': - pp[i-1].chtype = chunk_type (PLAIN) - pp[i-1].data = '[' - if (i < length) and \ - (pp[i].chtype == chunk_type(GROUP)): - cp=pp[i].data - pp[i:i+1]=cp + [\ - chunk(PLAIN, ch.where, ']')] - length = length+len(cp) - elif s(buf, ch.data) in ignoredcommands: - del pp[i-1] - i, length = i-1, length-1 - elif s(buf, ch.data) == '@' and \ - i != length and \ - pp[i].chtype == chunk_type(PLAIN) and \ - s(buf, pp[i].data)[0] == '.': - # \@. --> \. --> @. - ch.data = '.' - del pp[i] - length = length-1 - elif s(buf, ch.data) == '\\': - # \\ --> \* --> @* - ch.data = '*' - elif len(s(buf, ch.data)) == 1 and \ - s(buf, ch.data) in onlylatexspecial: - ch.chtype = chunk_type(PLAIN) - # check if such a command is followed by - # an empty group: e.g., `\%{}'. If so, remove - # this empty group too - if i < length and \ - pp[i].chtype == chunk_type(GROUP) \ - and len(pp[i].data) == 0: - del pp[i] - length = length-1 - - elif hist.inargs and s(buf, ch.data) in inargsselves: - # This is the special processing of the - # arguments of the \begin{funcdesc}... or - # \funcline... arguments - # \, --> , \[ --> [, \] --> ] - ch.chtype = chunk_type(PLAIN) - - elif s(buf, ch.data) == 'renewcommand': - # \renewcommand{\indexsubitem}.... - i, length = i-1, length-1 - del pp[i] - length, newi = getnextarg(length, buf, pp, i) - if newi-i == 1 \ - and i < length \ - and pp[i].chtype == chunk_type(CSNAME) \ - and s(buf, pp[i].data) == 'indexsubitem': - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - text = flattext(buf, pp[i:newi]) - if text[:1] != '(' or text[-1:] != ')': - raise error, 'expected indexsubitme enclosed in braces' - words = string.split(text[1:-1]) - hist.indexsubitem = words - del text, words - else: - print 'WARNING: renewcommand with unsupported arg removed' - del pp[i:newi] - length = length - (newi-i) - - elif s(buf, ch.data) == 'item': - ch.chtype = chunk_type(CSLINE) - length, newi = getoptarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - i, length = i+1, length+1 - - elif s(buf, ch.data) == 'ttindex': - idxsi = hist.indexsubitem - - cat_class = '' - if len(idxsi) >= 2 and idxsi[1] in \ - ('method', 'function', 'protocol'): - command = 'findex' - elif len(idxsi) >= 2 and idxsi[1] in \ - ('exception', 'object'): - command = 'vindex' - else: - print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command' - command = 'cindex' - - if not cat_class: - cat_class = '('+string.join(idxsi)+')' - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - length, newi = getnextarg(length, buf, pp, i) - arg = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - cat_arg = [chunk(PLAIN, ch.where, cat_class)] - - # determine what should be set in roman, and - # what in tt-font - if command in regindices: - - arg = [chunk(CSNAME, ch.where, 't'), \ - chunk(GROUP, ch.where, arg)] - else: - cat_arg = [chunk(CSNAME, ch.where, 'r'), \ - chunk(GROUP, ch.where, cat_arg)] - - ingroupch = arg + \ - [chunk(PLAIN, ch.where, ' ')] + \ - cat_arg - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'ldots': - # \ldots --> \dots{} --> @dots{} - ch.data = 'dots' - if i == length \ - or pp[i].chtype != chunk_type(GROUP) \ - or pp[i].data != []: - pp.insert(i, chunk(GROUP, ch.where, [])) - i, length = i+1, length+1 - elif s(buf, ch.data) in wordsselves: - # \UNIX --> UNIX - ch.chtype = chunk_type(PLAIN) - if i != length \ - and pp[i].chtype == chunk_type(GROUP) \ - and pp[i].data == []: - del pp[i] - length = length-1 - elif s(buf, ch.data) in for_texi: - pass - - elif s(buf, ch.data) == 'e': - # "\e" --> "\" - ch.data = '\\' - ch.chtype = chunk_type(PLAIN) - elif (s(buf, ch.data) == 'lineiii') or\ - (s(buf, ch.data) == 'lineii'): - # This is the most tricky one - # \lineiii{a1}{a2}[{a3}] --> - # @item @<cts. of itemargmacro>{a1} - # a2 [ -- a3] - # - ##print 'LINEIIIIII!!!!!!!' + if not cat_class: + cat_class = '('+string.join(idxsi)+')' + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + length, newi = getnextarg(length, buf, pp, i) + arg = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + cat_arg = [chunk(PLAIN, ch.where, cat_class)] + + # determine what should be set in roman, and + # what in tt-font + if command in regindices: + + arg = [chunk(CSNAME, ch.where, 't'), + chunk(GROUP, ch.where, arg)] + else: + cat_arg = [chunk(CSNAME, ch.where, 'r'), + chunk(GROUP, ch.where, cat_arg)] + + ingroupch = arg + \ + [chunk(PLAIN, ch.where, ' ')] + \ + cat_arg + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'ldots': + # \ldots --> \dots{} --> @dots{} + ch.data = 'dots' + if i == length \ + or pp[i].chtype != chunk_type(GROUP) \ + or pp[i].data != []: + pp.insert(i, chunk(GROUP, ch.where, [])) + i, length = i+1, length+1 + elif s(buf, ch.data) in wordsselves: + # \UNIX --> UNIX + ch.chtype = chunk_type(PLAIN) + if i != length \ + and pp[i].chtype == chunk_type(GROUP) \ + and pp[i].data == []: + del pp[i] + length = length-1 + elif s(buf, ch.data) in for_texi: + pass + + elif s(buf, ch.data) == 'e': + # "\e" --> "\" + ch.data = '\\' + ch.chtype = chunk_type(PLAIN) + elif (s(buf, ch.data) == 'lineiii') or\ + (s(buf, ch.data) == 'lineii'): + # This is the most tricky one + # \lineiii{a1}{a2}[{a3}] --> + # @item @<cts. of itemargmacro>{a1} + # a2 [ -- a3] + # + ##print 'LINEIIIIII!!!!!!!' ## wobj = Wobj() ## dumpit(buf, wobj.write, pp[i-1:i+5]) ## print '--->' + wobj.data + '<----' - if not hist.inenv: - raise error, \ - 'no environment for lineiii' - if (hist.inenv[0] != 'tableiii') and\ - (hist.inenv[0] != 'tableii'): - raise error, \ - 'wrong command (' + \ - s(buf, ch.data)+ \ - ') in wrong environment (' \ - + `hist.inenv[0]` + ')' - ch.chtype = chunk_type(CSLINE) - ch.data = 'item' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = [chunk(CSNAME, 0, \ - hist.itemargmacro), \ - chunk(GROUP, 0, pp[i:newi])] - del pp[i:newi] - length = length - (newi-i) + if not hist.inenv: + raise error, 'no environment for lineiii' + if (hist.inenv[0] != 'tableiii') and \ + (hist.inenv[0] != 'tableii'): + raise error, \ + 'wrong command (' + \ + s(buf, ch.data)+ \ + ') in wrong environment (' \ + + `hist.inenv[0]` + ')' + ch.chtype = chunk_type(CSLINE) + ch.data = 'item' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = [chunk(CSNAME, 0, + hist.itemargmacro), + chunk(GROUP, 0, pp[i:newi])] + del pp[i:newi] + length = length - (newi-i) ## print 'ITEM ARG: --->', ## wobj = Wobj() ## dumpit(buf, wobj.write, ingroupch) ## print wobj.data, '<---' - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - grouppos = i - i, length = i+1, length+1 - length, i = getnextarg(length, buf, pp, i) - length, newi = getnextarg(length, buf, pp, i) - if newi > i: - # we have a 3rd arg - pp.insert(i, chunk(PLAIN, ch.where, ' --- ')) - i = newi + 1 - length = length + 1 + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + grouppos = i + i, length = i+1, length+1 + length, i = getnextarg(length, buf, pp, i) + length, newi = getnextarg(length, buf, pp, i) + if newi > i: + # we have a 3rd arg + pp.insert(i, chunk(PLAIN, ch.where, ' --- ')) + i = newi + 1 + length = length + 1 ## pp[grouppos].data = pp[grouppos].data \ ## + [chunk(PLAIN, ch.where, ' ')] \ ## + pp[i:newi] ## del pp[i:newi] ## length = length - (newi-i) - if length != len(pp): - raise 'IN LINEIII IS THE ERR', `i` - - elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'): - #\xxxsection{A} ----> - # @node A, , , - # @xxxsection A - ## also: remove commas and quotes - ch.chtype = chunk_type(CSLINE) - length, newi = getnextarg(length, buf, pp, i) - afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename') - if afternodenamecmd < 0: - cp1 = crcopy(pp[i:newi]) - pp[i:newi] = [\ - chunk(GROUP, ch.where, \ - pp[i:newi])] - length, newi = length - (newi-i) + 1, \ - i+1 - text = flattext(buf, cp1) - text = invent_node_names(text) - else: - length, endarg = getnextarg(length, buf, pp, afternodenamecmd) - cp1 = crcopy(pp[afternodenamecmd:endarg]) - del pp[newi:endarg] - length = length - (endarg-newi) - - pp[i:newi] = [\ - chunk(GROUP, ch.where, \ - pp[i:newi])] - length, newi = length - (newi-i) + 1, \ - i + 1 - text = flattext(buf, cp1) - if text[-1] == '.': - text = text[:-1] + if length != len(pp): + raise 'IN LINEIII IS THE ERR', `i` + + elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'): + #\xxxsection{A} ----> + # @node A, , , + # @xxxsection A + ## also: remove commas and quotes + ch.chtype = chunk_type(CSLINE) + length, newi = getnextarg(length, buf, pp, i) + afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename') + if afternodenamecmd < 0: + cp1 = crcopy(pp[i:newi]) + pp[i:newi] = [ + chunk(GROUP, ch.where, + pp[i:newi])] + length, newi = length - (newi-i) + 1, i+1 + text = flattext(buf, cp1) + text = invent_node_names(text) + else: + length, endarg = getnextarg(length, buf, pp, afternodenamecmd) + cp1 = crcopy(pp[afternodenamecmd:endarg]) + del pp[newi:endarg] + length = length - (endarg-newi) + + pp[i:newi] = [ + chunk(GROUP, ch.where, + pp[i:newi])] + length, newi = length - (newi-i) + 1, i + 1 + text = flattext(buf, cp1) + if text[-1] == '.': + text = text[:-1] ## print 'FLATTEXT:', `text` - if text in hist.nodenames: - print 'WARNING: node name ' + `text` + ' already used' - out.doublenodes.append(text) - else: - hist.nodenames.append(text) - text = rm_commas_etc(text) - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'node'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, text+', , ,')\ - ])] - i, length = newi+2, length+2 - - elif s(buf,ch.data) == 'funcline': - # fold it to a very short environment - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - length, i = do_funcdesc(length, buf, pp, i) - - elif s(buf,ch.data) == 'dataline': - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - length, i = do_datadesc(length, buf, pp, i) - - elif s(buf,ch.data) == 'excline': - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - length, i = do_excdesc(length, buf, pp, i) - - - elif s(buf, ch.data) == 'index': - #\index{A} ---> - # @cindex A - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - length, newi = getnextarg(length, buf, pp, i) - - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - elif s(buf, ch.data) == 'bifuncindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'findex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(built-in function)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'obindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'findex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(object)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'opindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'findex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(operator)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'bimodindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'pindex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(built-in)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - elif s(buf, ch.data) == 'sectcode': - ch.data = 'code' - - - elif s(buf, ch.data) == 'stmodindex': - ch.chtype = chunk_type(CSLINE) - # use the program index as module index - ch.data = 'pindex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(standard)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'stindex': - # XXX must actually go to newindex st - wh = ch.where - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = [chunk(CSNAME, wh, 'code'), \ - chunk(GROUP, wh, pp[i:newi])] - - del pp[i:newi] - length = length - (newi-i) - - t = ingroupch[:] - t.append(chunk(PLAIN, wh, ' statement')) - - pp.insert(i, chunk(GROUP, wh, t)) - i, length = i+1, length+1 - - pp.insert(i, chunk(CSLINE, wh, 'cindex')) - i, length = i+1, length+1 - - t = ingroupch[:] - t.insert(0, chunk(PLAIN, wh, 'statement, ')) - - pp.insert(i, chunk(GROUP, wh, t)) - i, length = i+1, length+1 - - - elif s(buf, ch.data) == 'indexii': - #\indexii{A}{B} ---> - # @cindex A B - # @cindex B, A - length, newi = getnextarg(length, buf, pp, i) - cp11 = pp[i:newi] - cp21 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp12 = pp[i:newi] - cp22 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - pp.insert(i, chunk(GROUP, ch.where, cp11 + [\ - chunk(PLAIN, ch.where, ' ')] + cp12)) - i, length = i+1, length+1 - pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ - chunk(GROUP, ch.where, cp22 + [\ - chunk(PLAIN, ch.where, ', ')]+ cp21)] - i, length = i+2, length+2 - - elif s(buf, ch.data) == 'indexiii': - length, newi = getnextarg(length, buf, pp, i) - cp11 = pp[i:newi] - cp21 = crcopy(pp[i:newi]) - cp31 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp12 = pp[i:newi] - cp22 = crcopy(pp[i:newi]) - cp32 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp13 = pp[i:newi] - cp23 = crcopy(pp[i:newi]) - cp33 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - pp.insert(i, chunk(GROUP, ch.where, cp11 + [\ - chunk(PLAIN, ch.where, ' ')] + cp12 \ - + [chunk(PLAIN, ch.where, ' ')] \ - + cp13)) - i, length = i+1, length+1 - pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ - chunk(GROUP, ch.where, cp22 + [\ - chunk(PLAIN, ch.where, ' ')]+ cp23\ - + [chunk(PLAIN, ch.where, ', ')] +\ - cp21)] - i, length = i+2, length+2 - pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ - chunk(GROUP, ch.where, cp33 + [\ - chunk(PLAIN, ch.where, ', ')]+ cp31\ - + [chunk(PLAIN, ch.where, ' ')] +\ - cp32)] - i, length = i+2, length+2 - - - elif s(buf, ch.data) == 'indexiv': - length, newi = getnextarg(length, buf, pp, i) - cp11 = pp[i:newi] - cp21 = crcopy(pp[i:newi]) - cp31 = crcopy(pp[i:newi]) - cp41 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp12 = pp[i:newi] - cp22 = crcopy(pp[i:newi]) - cp32 = crcopy(pp[i:newi]) - cp42 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp13 = pp[i:newi] - cp23 = crcopy(pp[i:newi]) - cp33 = crcopy(pp[i:newi]) - cp43 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp14 = pp[i:newi] - cp24 = crcopy(pp[i:newi]) - cp34 = crcopy(pp[i:newi]) - cp44 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - ingroupch = cp11 + \ - spacech + cp12 + \ - spacech + cp13 + \ - spacech + cp14 - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - i, length = i+1, length+1 - ingroupch = cp22 + \ - spacech + cp23 + \ - spacech + cp24 + \ - commach + cp21 - pp[i:i] = cindexch + [\ - chunk(GROUP, ch.where, ingroupch)] - i, length = i+2, length+2 - ingroupch = cp33 + \ - spacech + cp34 + \ - commach + cp31 + \ - spacech + cp32 - pp[i:i] = cindexch + [\ - chunk(GROUP, ch.where, ingroupch)] - i, length = i+2, length+2 - ingroupch = cp44 + \ - commach + cp41 + \ - spacech + cp42 + \ - spacech + cp43 - pp[i:i] = cindexch + [\ - chunk(GROUP, ch.where, ingroupch)] - i, length = i+2, length+2 - - - - else: - print 'don\'t know what to do with keyword ' + `s(buf, ch.data)` - - - + if text in hist.nodenames: + print 'WARNING: node name ' + `text` + ' already used' + out.doublenodes.append(text) + else: + hist.nodenames.append(text) + text = rm_commas_etc(text) + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'node'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, text+', , ,') + ])] + i, length = newi+2, length+2 + + elif s(buf,ch.data) == 'funcline': + # fold it to a very short environment + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + length, i = do_funcdesc(length, buf, pp, i) + + elif s(buf,ch.data) == 'dataline': + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + length, i = do_datadesc(length, buf, pp, i) + + elif s(buf,ch.data) == 'excline': + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + length, i = do_excdesc(length, buf, pp, i) + + + elif s(buf, ch.data) == 'index': + #\index{A} ---> + # @cindex A + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + length, newi = getnextarg(length, buf, pp, i) + + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + elif s(buf, ch.data) == 'bifuncindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'findex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(built-in function)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'obindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'findex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(object)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'opindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'findex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(operator)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'bimodindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'pindex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(built-in)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + elif s(buf, ch.data) == 'sectcode': + ch.data = 'code' + + + elif s(buf, ch.data) == 'stmodindex': + ch.chtype = chunk_type(CSLINE) + # use the program index as module index + ch.data = 'pindex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(standard)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'stindex': + # XXX must actually go to newindex st + wh = ch.where + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = [chunk(CSNAME, wh, 'code'), + chunk(GROUP, wh, pp[i:newi])] + + del pp[i:newi] + length = length - (newi-i) + + t = ingroupch[:] + t.append(chunk(PLAIN, wh, ' statement')) + + pp.insert(i, chunk(GROUP, wh, t)) + i, length = i+1, length+1 + + pp.insert(i, chunk(CSLINE, wh, 'cindex')) + i, length = i+1, length+1 + + t = ingroupch[:] + t.insert(0, chunk(PLAIN, wh, 'statement, ')) + + pp.insert(i, chunk(GROUP, wh, t)) + i, length = i+1, length+1 + + + elif s(buf, ch.data) == 'indexii': + #\indexii{A}{B} ---> + # @cindex A B + # @cindex B, A + length, newi = getnextarg(length, buf, pp, i) + cp11 = pp[i:newi] + cp21 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp12 = pp[i:newi] + cp22 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + pp.insert(i, chunk(GROUP, ch.where, cp11 + [ + chunk(PLAIN, ch.where, ' ')] + cp12)) + i, length = i+1, length+1 + pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), + chunk(GROUP, ch.where, cp22 + [ + chunk(PLAIN, ch.where, ', ')]+ cp21)] + i, length = i+2, length+2 + + elif s(buf, ch.data) == 'indexiii': + length, newi = getnextarg(length, buf, pp, i) + cp11 = pp[i:newi] + cp21 = crcopy(pp[i:newi]) + cp31 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp12 = pp[i:newi] + cp22 = crcopy(pp[i:newi]) + cp32 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp13 = pp[i:newi] + cp23 = crcopy(pp[i:newi]) + cp33 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + pp.insert(i, chunk(GROUP, ch.where, cp11 + [ + chunk(PLAIN, ch.where, ' ')] + cp12 + + [chunk(PLAIN, ch.where, ' ')] + + cp13)) + i, length = i+1, length+1 + pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), + chunk(GROUP, ch.where, cp22 + [ + chunk(PLAIN, ch.where, ' ')]+ cp23 + + [chunk(PLAIN, ch.where, ', ')] + + cp21)] + i, length = i+2, length+2 + pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), + chunk(GROUP, ch.where, cp33 + [ + chunk(PLAIN, ch.where, ', ')]+ cp31 + + [chunk(PLAIN, ch.where, ' ')] + + cp32)] + i, length = i+2, length+2 + + + elif s(buf, ch.data) == 'indexiv': + length, newi = getnextarg(length, buf, pp, i) + cp11 = pp[i:newi] + cp21 = crcopy(pp[i:newi]) + cp31 = crcopy(pp[i:newi]) + cp41 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp12 = pp[i:newi] + cp22 = crcopy(pp[i:newi]) + cp32 = crcopy(pp[i:newi]) + cp42 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp13 = pp[i:newi] + cp23 = crcopy(pp[i:newi]) + cp33 = crcopy(pp[i:newi]) + cp43 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp14 = pp[i:newi] + cp24 = crcopy(pp[i:newi]) + cp34 = crcopy(pp[i:newi]) + cp44 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + ingroupch = cp11 + \ + spacech + cp12 + \ + spacech + cp13 + \ + spacech + cp14 + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + i, length = i+1, length+1 + ingroupch = cp22 + \ + spacech + cp23 + \ + spacech + cp24 + \ + commach + cp21 + pp[i:i] = cindexch + [ + chunk(GROUP, ch.where, ingroupch)] + i, length = i+2, length+2 + ingroupch = cp33 + \ + spacech + cp34 + \ + commach + cp31 + \ + spacech + cp32 + pp[i:i] = cindexch + [ + chunk(GROUP, ch.where, ingroupch)] + i, length = i+2, length+2 + ingroupch = cp44 + \ + commach + cp41 + \ + spacech + cp42 + \ + spacech + cp43 + pp[i:i] = cindexch + [ + chunk(GROUP, ch.where, ingroupch)] + i, length = i+2, length+2 + + + + else: + print 'don\'t know what to do with keyword ' + `s(buf, ch.data)` + + + re_atsign = regex.compile('[@{}]') re_newline = regex.compile('\n') def dumpit(buf, wm, pp): - global out - - i, length = 0, len(pp) + global out - addspace = 0 - - while 1: - if len(pp) != length: - raise 'FATAL', 'inconsistent length' - if i == length: - break - ch = pp[i] - i = i + 1 + i, length = 0, len(pp) - if addspace: - dospace = 1 - addspace = 0 - else: - dospace = 0 - - if ch.chtype == chunk_type(CSNAME): - wm('@' + s(buf, ch.data)) - if s(buf, ch.data) == 'node' and \ - pp[i].chtype == chunk_type(PLAIN) and \ - s(buf, pp[i].data) in out.doublenodes: - ##XXX doesnt work yet?? - wm(' ZZZ-' + zfill(`i`, 4)) - if s(buf, ch.data)[0] in string.letters: - addspace = 1 - elif ch.chtype == chunk_type(PLAIN): - if dospace and s(buf, ch.data) not in (' ', '\t'): - wm(' ') - text = s(buf, ch.data) - while 1: - pos = re_atsign.search(text) - if pos < 0: - break - wm(text[:pos] + '@' + text[pos]) - text = text[pos+1:] - wm(text) - elif ch.chtype == chunk_type(GROUP): - wm('{') - dumpit(buf, wm, ch.data) - wm('}') - elif ch.chtype == chunk_type(DENDLINE): - wm('\n\n') - while i != length and pp[i].chtype in \ - (chunk_type(DENDLINE), chunk_type(ENDLINE)): - i = i + 1 - elif ch.chtype == chunk_type(OTHER): - wm(s(buf, ch.data)) - elif ch.chtype == chunk_type(ACTIVE): - wm(s(buf, ch.data)) - elif ch.chtype == chunk_type(ENDLINE): - wm('\n') - elif ch.chtype == chunk_type(CSLINE): - if i >= 2 and pp[i-2].chtype not in \ - (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ - and (pp[i-2].chtype != chunk_type(PLAIN) \ - or s(buf, pp[i-2].data)[-1] != '\n'): - - wm('\n') - wm('@' + s(buf, ch.data)) - if i == length: - raise error, 'CSLINE expected another chunk' - if pp[i].chtype != chunk_type(GROUP): - raise error, 'CSLINE expected GROUP' - if type(pp[i].data) != ListType: - raise error, 'GROUP chould contain []-data' - - wobj = Wobj() - dumpit(buf, wobj.write, pp[i].data) - i = i + 1 - text = wobj.data - del wobj - if text: - wm(' ') - while 1: - pos = re_newline.search(text) - if pos < 0: - break - print 'WARNING: found newline in csline arg' - wm(text[:pos] + ' ') - text = text[pos+1:] - wm(text) - if i >= length or \ - pp[i].chtype not in (chunk_type(CSLINE), \ - chunk_type(ENDLINE), chunk_type(DENDLINE)) \ - and (pp[i].chtype != chunk_type(PLAIN) \ - or s(buf, pp[i].data)[0] != '\n'): - wm('\n') - - elif ch.chtype == chunk_type(COMMENT): + addspace = 0 + + while 1: + if len(pp) != length: + raise 'FATAL', 'inconsistent length' + if i == length: + break + ch = pp[i] + i = i + 1 + + if addspace: + dospace = 1 + addspace = 0 + else: + dospace = 0 + + if ch.chtype == chunk_type(CSNAME): + wm('@' + s(buf, ch.data)) + if s(buf, ch.data) == 'node' and \ + pp[i].chtype == chunk_type(PLAIN) and \ + s(buf, pp[i].data) in out.doublenodes: + ##XXX doesnt work yet?? + wm(' ZZZ-' + zfill(`i`, 4)) + if s(buf, ch.data)[0] in string.letters: + addspace = 1 + elif ch.chtype == chunk_type(PLAIN): + if dospace and s(buf, ch.data) not in (' ', '\t'): + wm(' ') + text = s(buf, ch.data) + while 1: + pos = re_atsign.search(text) + if pos < 0: + break + wm(text[:pos] + '@' + text[pos]) + text = text[pos+1:] + wm(text) + elif ch.chtype == chunk_type(GROUP): + wm('{') + dumpit(buf, wm, ch.data) + wm('}') + elif ch.chtype == chunk_type(DENDLINE): + wm('\n\n') + while i != length and pp[i].chtype in \ + (chunk_type(DENDLINE), chunk_type(ENDLINE)): + i = i + 1 + elif ch.chtype == chunk_type(OTHER): + wm(s(buf, ch.data)) + elif ch.chtype == chunk_type(ACTIVE): + wm(s(buf, ch.data)) + elif ch.chtype == chunk_type(ENDLINE): + wm('\n') + elif ch.chtype == chunk_type(CSLINE): + if i >= 2 and pp[i-2].chtype not in \ + (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ + and (pp[i-2].chtype != chunk_type(PLAIN) + or s(buf, pp[i-2].data)[-1] != '\n'): + + wm('\n') + wm('@' + s(buf, ch.data)) + if i == length: + raise error, 'CSLINE expected another chunk' + if pp[i].chtype != chunk_type(GROUP): + raise error, 'CSLINE expected GROUP' + if type(pp[i].data) != ListType: + raise error, 'GROUP chould contain []-data' + + wobj = Wobj() + dumpit(buf, wobj.write, pp[i].data) + i = i + 1 + text = wobj.data + del wobj + if text: + wm(' ') + while 1: + pos = re_newline.search(text) + if pos < 0: + break + print 'WARNING: found newline in csline arg' + wm(text[:pos] + ' ') + text = text[pos+1:] + wm(text) + if i >= length or \ + pp[i].chtype not in (chunk_type(CSLINE), + chunk_type(ENDLINE), chunk_type(DENDLINE)) \ + and (pp[i].chtype != chunk_type(PLAIN) + or s(buf, pp[i].data)[0] != '\n'): + wm('\n') + + elif ch.chtype == chunk_type(COMMENT): ## print 'COMMENT: previous chunk =', pp[i-2] ## if pp[i-2].chtype == chunk_type(PLAIN): ## print 'PLAINTEXT =', `s(buf, pp[i-2].data)` - if s(buf, ch.data) and \ - regex.match('^[ \t]*$', s(buf, ch.data)) < 0: - if i >= 2 and pp[i-2].chtype not in \ - (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ - and not (pp[i-2].chtype == chunk_type(PLAIN) \ - and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0): - print 'ADDING NEWLINE' - wm('\n') - wm('@c ' + s(buf, ch.data)) - elif ch.chtype == chunk_type(IGNORE): - pass - else: - try: - str = `s(buf, ch.data)` - except TypeError: - str = `ch.data` - if len(str) > 400: - str = str[:400] + '...' - print 'warning:', ch.chtype, 'not handled, data ' + str + if s(buf, ch.data) and \ + regex.match('^[ \t]*$', s(buf, ch.data)) < 0: + if i >= 2 and pp[i-2].chtype not in \ + (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ + and not (pp[i-2].chtype == chunk_type(PLAIN) + and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0): + print 'ADDING NEWLINE' + wm('\n') + wm('@c ' + s(buf, ch.data)) + elif ch.chtype == chunk_type(IGNORE): + pass + else: + try: + str = `s(buf, ch.data)` + except TypeError: + str = `ch.data` + if len(str) > 400: + str = str[:400] + '...' + print 'warning:', ch.chtype, 'not handled, data ' + str def main(): - outfile = None - headerfile = 'texipre.dat' - trailerfile = 'texipost.dat' - - try: - opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:') - except getopt.error: - args = [] - - if not args: - print 'usage: partparse [-o outfile] [-h headerfile]', - print '[-t trailerfile] file ...' - sys.exit(2) - - for opt, arg in opts: - if opt == '-o': outfile = arg - if opt == '-h': headerfile = arg - if opt == '-t': trailerfile = arg - - if not outfile: - root, ext = os.path.splitext(args[0]) - outfile = root + '.texi' - - if outfile in args: - print 'will not overwrite input file', outfile - sys.exit(2) - - outf = open(outfile, 'w') - outf.write(open(headerfile, 'r').read()) - - for file in args: - if len(args) > 1: print '='*20, file, '='*20 - buf = open(file, 'r').read() - w, pp = parseit(buf) - startchange() - changeit(buf, pp) - dumpit(buf, outf.write, pp) - - outf.write(open(trailerfile, 'r').read()) - - outf.close() + outfile = None + headerfile = 'texipre.dat' + trailerfile = 'texipost.dat' + + try: + opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:') + except getopt.error: + args = [] + + if not args: + print 'usage: partparse [-o outfile] [-h headerfile]', + print '[-t trailerfile] file ...' + sys.exit(2) + + for opt, arg in opts: + if opt == '-o': outfile = arg + if opt == '-h': headerfile = arg + if opt == '-t': trailerfile = arg + + if not outfile: + root, ext = os.path.splitext(args[0]) + outfile = root + '.texi' + + if outfile in args: + print 'will not overwrite input file', outfile + sys.exit(2) + + outf = open(outfile, 'w') + outf.write(open(headerfile, 'r').read()) + + for file in args: + if len(args) > 1: print '='*20, file, '='*20 + buf = open(file, 'r').read() + w, pp = parseit(buf) + startchange() + changeit(buf, pp) + dumpit(buf, outf.write, pp) + + outf.write(open(trailerfile, 'r').read()) + + outf.close() if __name__ == "__main__": main() diff --git a/Doc/tools/partparse.py b/Doc/tools/partparse.py index db53a01..2f072bb 100644 --- a/Doc/tools/partparse.py +++ b/Doc/tools/partparse.py @@ -27,61 +27,61 @@ MODE_MATH = 4 MODE_DMATH = 5 MODE_GOBBLEWHITE = 6 -the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \ - MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE +the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, + MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE) # Show the neighbourhood of the scanned buffer def epsilon(buf, where): - wmt, wpt = where - 10, where + 10 - if wmt < 0: - wmt = 0 - if wpt > len(buf): - wpt = len(buf) - return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.' + wmt, wpt = where - 10, where + 10 + if wmt < 0: + wmt = 0 + if wpt > len(buf): + wpt = len(buf) + return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.' # Should return the line number. never worked def lin(): - global lineno - return ' Line ' + `lineno` + '.' + global lineno + return ' Line ' + `lineno` + '.' # Displays the recursion level. def lv(lvl): - return ' Level ' + `lvl` + '.' + return ' Level ' + `lvl` + '.' # Combine the three previous functions. Used often. def lle(lvl, buf, where): - return lv(lvl) + lin() + epsilon(buf, where) - - + return lv(lvl) + lin() + epsilon(buf, where) + + # This class is only needed for _symbolic_ representation of the parse mode. class Mode: - def __init__(self, arg): - if arg not in the_modes: - raise ValueError, 'mode not in the_modes' - self.mode = arg - - def __cmp__(self, other): - if type(self) != type(other): - other = mode(other) - return cmp(self.mode, other.mode) - - def __repr__(self): - if self.mode == MODE_REGULAR: - return 'MODE_REGULAR' - elif self.mode == MODE_VERBATIM: - return 'MODE_VERBATIM' - elif self.mode == MODE_CS_SCAN: - return 'MODE_CS_SCAN' - elif self.mode == MODE_COMMENT: - return 'MODE_COMMENT' - elif self.mode == MODE_MATH: - return 'MODE_MATH' - elif self.mode == MODE_DMATH: - return 'MODE_DMATH' - elif self.mode == MODE_GOBBLEWHITE: - return 'MODE_GOBBLEWHITE' - else: - raise ValueError, 'mode not in the_modes' + def __init__(self, arg): + if arg not in the_modes: + raise ValueError, 'mode not in the_modes' + self.mode = arg + + def __cmp__(self, other): + if type(self) != type(other): + other = mode(other) + return cmp(self.mode, other.mode) + + def __repr__(self): + if self.mode == MODE_REGULAR: + return 'MODE_REGULAR' + elif self.mode == MODE_VERBATIM: + return 'MODE_VERBATIM' + elif self.mode == MODE_CS_SCAN: + return 'MODE_CS_SCAN' + elif self.mode == MODE_COMMENT: + return 'MODE_COMMENT' + elif self.mode == MODE_MATH: + return 'MODE_MATH' + elif self.mode == MODE_DMATH: + return 'MODE_DMATH' + elif self.mode == MODE_GOBBLEWHITE: + return 'MODE_GOBBLEWHITE' + else: + raise ValueError, 'mode not in the_modes' # just a wrapper around a class initialisation mode = Mode @@ -106,88 +106,88 @@ GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME ENDLINE = 9 # END-OF-LINE, data = '\n' DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par ENV = 11 # LaTeX-environment - # data =(envname,[ch,ch,ch,.]) + # data =(envname,[ch,ch,ch,.]) CSLINE = 12 # for texi: next chunk will be one group - # of args. Will be set all on 1 line + # of args. Will be set all on 1 line IGNORE = 13 # IGNORE this data ENDENV = 14 # TEMP END OF GROUP INDICATOR IF = 15 # IF-directive - # data = (flag,negate,[ch, ch, ch,...]) -the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \ - GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF + # data = (flag,negate,[ch, ch, ch,...]) +the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, + GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF) # class, just to display symbolic name class ChunkType: - def __init__(self, chunk_type): - if chunk_type not in the_types: - raise ValueError, 'chunk_type not in the_types' - self.chunk_type = chunk_type - - def __cmp__(self, other): - if type(self) != type(other): - other = chunk_type(other) - return cmp(self.chunk_type, other.chunk_type) - - def __repr__(self): - if self.chunk_type == PLAIN: - return 'PLAIN' - elif self.chunk_type == GROUP: - return 'GROUP' - elif self.chunk_type == CSNAME: - return 'CSNAME' - elif self.chunk_type == COMMENT: - return 'COMMENT' - elif self.chunk_type == DMATH: - return 'DMATH' - elif self.chunk_type == MATH: - return 'MATH' - elif self.chunk_type == OTHER: - return 'OTHER' - elif self.chunk_type == ACTIVE: - return 'ACTIVE' - elif self.chunk_type == GOBBLEDWHITE: - return 'GOBBLEDWHITE' - elif self.chunk_type == DENDLINE: - return 'DENDLINE' - elif self.chunk_type == ENDLINE: - return 'ENDLINE' - elif self.chunk_type == ENV: - return 'ENV' - elif self.chunk_type == CSLINE: - return 'CSLINE' - elif self.chunk_type == IGNORE: - return 'IGNORE' - elif self.chunk_type == ENDENV: - return 'ENDENV' - elif self.chunk_type == IF: - return 'IF' - else: - raise ValueError, 'chunk_type not in the_types' + def __init__(self, chunk_type): + if chunk_type not in the_types: + raise ValueError, 'chunk_type not in the_types' + self.chunk_type = chunk_type + + def __cmp__(self, other): + if type(self) != type(other): + other = chunk_type(other) + return cmp(self.chunk_type, other.chunk_type) + + def __repr__(self): + if self.chunk_type == PLAIN: + return 'PLAIN' + elif self.chunk_type == GROUP: + return 'GROUP' + elif self.chunk_type == CSNAME: + return 'CSNAME' + elif self.chunk_type == COMMENT: + return 'COMMENT' + elif self.chunk_type == DMATH: + return 'DMATH' + elif self.chunk_type == MATH: + return 'MATH' + elif self.chunk_type == OTHER: + return 'OTHER' + elif self.chunk_type == ACTIVE: + return 'ACTIVE' + elif self.chunk_type == GOBBLEDWHITE: + return 'GOBBLEDWHITE' + elif self.chunk_type == DENDLINE: + return 'DENDLINE' + elif self.chunk_type == ENDLINE: + return 'ENDLINE' + elif self.chunk_type == ENV: + return 'ENV' + elif self.chunk_type == CSLINE: + return 'CSLINE' + elif self.chunk_type == IGNORE: + return 'IGNORE' + elif self.chunk_type == ENDENV: + return 'ENDENV' + elif self.chunk_type == IF: + return 'IF' + else: + raise ValueError, 'chunk_type not in the_types' # ...and the wrapper _all_chunk_types = {} for t in the_types: - _all_chunk_types[t] = ChunkType(t) + _all_chunk_types[t] = ChunkType(t) def chunk_type(t): - return _all_chunk_types[t] + return _all_chunk_types[t] # store a type object of the ChunkType-class-instance... chunk_type_type = type(chunk_type(0)) # this class contains a part of the parsed buffer class Chunk: - def __init__(self, chtype, where, data): - if type(chtype) != chunk_type_type: - chtype = chunk_type(chtype) - self.chtype = chtype - if type(where) != IntType: - raise TypeError, '\'where\' is not a number' - self.where = where - self.data = data - - def __repr__(self): - return 'chunk' + `self.chtype, self.where, self.data` + def __init__(self, chtype, where, data): + if type(chtype) != chunk_type_type: + chtype = chunk_type(chtype) + self.chtype = chtype + if type(where) != IntType: + raise TypeError, "'where' is not a number" + self.where = where + self.data = data + + def __repr__(self): + return 'chunk' + `self.chtype, self.where, self.data` # and the wrapper chunk = Chunk @@ -216,40 +216,40 @@ CC_COMMENT = 14 CC_INVALID = 15 # and the names -cc_names = [\ - 'CC_ESCAPE', \ - 'CC_LBRACE', \ - 'CC_RBRACE', \ - 'CC_MATHSHIFT', \ - 'CC_ALIGNMENT', \ - 'CC_ENDLINE', \ - 'CC_PARAMETER', \ - 'CC_SUPERSCRIPT', \ - 'CC_SUBSCRIPT', \ - 'CC_IGNORE', \ - 'CC_WHITE', \ - 'CC_LETTER', \ - 'CC_OTHER', \ - 'CC_ACTIVE', \ - 'CC_COMMENT', \ - 'CC_INVALID', \ +cc_names = [ + 'CC_ESCAPE', + 'CC_LBRACE', + 'CC_RBRACE', + 'CC_MATHSHIFT', + 'CC_ALIGNMENT', + 'CC_ENDLINE', + 'CC_PARAMETER', + 'CC_SUPERSCRIPT', + 'CC_SUBSCRIPT', + 'CC_IGNORE', + 'CC_WHITE', + 'CC_LETTER', + 'CC_OTHER', + 'CC_ACTIVE', + 'CC_COMMENT', + 'CC_INVALID', ] # Show a list of catcode-name-symbols def pcl(codelist): - result = '' - for i in codelist: - result = result + cc_names[i] + ', ' - return '[' + result[:-2] + ']' + result = '' + for i in codelist: + result = result + cc_names[i] + ', ' + return '[' + result[:-2] + ']' # the name of the catcode (ACTIVE, OTHER, etc.) def pc(code): - return cc_names[code] - + return cc_names[code] + # Which catcodes make the parser stop parsing regular plaintext -regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \ - CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \ +regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, + CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE] # same for scanning a control sequence name @@ -269,37 +269,37 @@ comment_stopcodes = [CC_ENDLINE] # gather all characters together, specified by a list of catcodes def code2string(cc, codelist): - ##print 'code2string: codelist = ' + pcl(codelist), - result = '' - for category in codelist: - if cc[category]: - result = result + cc[category] - ##print 'result = ' + `result` - return result + ##print 'code2string: codelist = ' + pcl(codelist), + result = '' + for category in codelist: + if cc[category]: + result = result + cc[category] + ##print 'result = ' + `result` + return result # automatically generate all characters of catcode other, being the # complement set in the ASCII range (128 characters) def make_other_codes(cc): - otherchars = range(256) # could be made 256, no problem - for category in all_but_other_codes: - if cc[category]: - for c in cc[category]: - otherchars[ord(c)] = None - result = '' - for i in otherchars: - if i != None: - result = result + chr(i) - return result + otherchars = range(256) # could be made 256, no problem + for category in all_but_other_codes: + if cc[category]: + for c in cc[category]: + otherchars[ord(c)] = None + result = '' + for i in otherchars: + if i != None: + result = result + chr(i) + return result # catcode dump (which characters have which catcodes). def dump_cc(name, cc): - ##print '\t' + name - ##print '=' * (8+len(name)) - if len(cc) != 16: - raise TypeError, 'cc not good cat class' + ##print '\t' + name + ##print '=' * (8+len(name)) + if len(cc) != 16: + raise TypeError, 'cc not good cat class' ## for i in range(16): ## print pc(i) + '\t' + `cc[i]` - + # In the beginning,.... epoch_cc = [None] * 16 @@ -316,7 +316,7 @@ initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F' #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway ##dump_cc('initex_cc', initex_cc) - + # LPLAIN: LaTeX catcode setting (see lplain.tex) lplain_cc = initex_cc[:] lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}' @@ -330,7 +330,7 @@ lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l lplain_cc[CC_OTHER] = make_other_codes(lplain_cc) ##dump_cc('lplain_cc', lplain_cc) - + # Guido's LaTeX environment catcoded '_' as ``other'' # my own purpose catlist my_cc = lplain_cc[:] @@ -344,27 +344,27 @@ dump_cc('my_cc', my_cc) re_meaning = '\\[]^$' def un_re(str): - result = '' - for i in str: - if i in re_meaning: - result = result + '\\' - result = result + i - return result - + result = '' + for i in str: + if i in re_meaning: + result = result + '\\' + result = result + i + return result + # NOTE the negate ('^') operator in *some* of the regexps below def make_rc_regular(cc): - # problems here if '[]' are included!! - return regex.compile('[' + code2string(cc, regular_stopcodes) + ']') + # problems here if '[]' are included!! + return regex.compile('[' + code2string(cc, regular_stopcodes) + ']') def make_rc_cs_scan(cc): - return regex.compile('[^' + code2string(cc, csname_scancodes) + ']') + return regex.compile('[^' + code2string(cc, csname_scancodes) + ']') def make_rc_comment(cc): - return regex.compile('[' + code2string(cc, comment_stopcodes) + ']') + return regex.compile('[' + code2string(cc, comment_stopcodes) + ']') def make_rc_endwhite(cc): - return regex.compile('[^' + code2string(cc, white_scancodes) + ']') - + return regex.compile('[^' + code2string(cc, white_scancodes) + ']') + # regular: normal mode: @@ -386,213 +386,205 @@ rc_endwhite = make_rc_endwhite(my_cc) # This has been done in order to better check for environment-mismatches def parseit(buf, *rest): - global lineno - - if len(rest) == 3: - parsemode, start, lvl = rest - elif len(rest) == 2: - parsemode, start, lvl = rest + (0, ) - elif len(rest) == 1: - parsemode, start, lvl = rest + (0, 0) - elif len(rest) == 0: - parsemode, start, lvl = mode(MODE_REGULAR), 0, 0 - else: - raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])' - result = [] - end = len(buf) - if lvl == 0 and parsemode == mode(MODE_REGULAR): - lineno = 1 - lvl = lvl + 1 - - ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')' - - # - # some of the more regular modes... - # - - if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)): - cstate = [] - newpos = start - curpmode = parsemode - while 1: - where = newpos - #print '\tnew round: ' + epsilon(buf, where) - if where == end: - if lvl > 1 or curpmode != mode(MODE_REGULAR): - # not the way we started... - raise EOFError, 'premature end of file.' + lle(lvl, buf, where) - # the real ending of lvl-1 parse - return end, result - - pos = rc_regular.search(buf, where) - - if pos < 0: - pos = end - - if pos != where: - newpos, c = pos, chunk(PLAIN, where, (where, pos)) - result.append(c) - continue - - - # - # ok, pos == where and pos != end - # - foundchar = buf[where] - if foundchar in my_cc[CC_LBRACE]: - # recursive subgroup parse... - newpos, data = parseit(buf, curpmode, where+1, lvl) - result.append(chunk(GROUP, where, data)) - - elif foundchar in my_cc[CC_RBRACE]: - if lvl <= 1: - raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where) - if lvl == 1 and mode != mode(MODE_REGULAR): - raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)' - return where + 1, result - - elif foundchar in my_cc[CC_ESCAPE]: - # - # call the routine that actually deals with - # this problem. If do_ret is None, than - # return the value of do_ret - # - # Note that handle_cs might call this routine - # recursively again... - # - do_ret, newpos = handlecs(buf, where, \ - curpmode, lvl, result, end) - if do_ret != None: - return do_ret - - elif foundchar in my_cc[CC_COMMENT]: - newpos, data = parseit(buf, \ - mode(MODE_COMMENT), where+1, lvl) - result.append(chunk(COMMENT, where, data)) - - elif foundchar in my_cc[CC_MATHSHIFT]: - # note that recursive calls to math-mode - # scanning are called with recursion-level 0 - # again, in order to check for bad mathend - # - if where + 1 != end and \ - buf[where + 1] in \ - my_cc[CC_MATHSHIFT]: - # - # double mathshift, e.g. '$$' - # - if curpmode == mode(MODE_REGULAR): - newpos, data = parseit(buf, \ - mode(MODE_DMATH), \ - where+2, 0) - result.append(chunk(DMATH, \ - where, data)) - elif curpmode == mode(MODE_MATH): - raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) - elif lvl != 1: - raise error, 'bad mathend.' + \ - lle(lvl, buf, where) - else: - return where + 2, result - else: - # - # single math shift, e.g. '$' - # - if curpmode == mode(MODE_REGULAR): - newpos, data = parseit(buf, \ - mode(MODE_MATH), \ - where+1, 0) - result.append(chunk(MATH, \ - where, data)) - elif curpmode == mode(MODE_DMATH): - raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) - elif lvl != 1: - raise error, 'bad mathend.' + \ - lv(lvl, buf, where) - else: - return where + 1, result - - elif foundchar in my_cc[CC_IGNORE]: - print 'warning: ignored char', `foundchar` - newpos = where + 1 - - elif foundchar in my_cc[CC_ACTIVE]: - result.append(chunk(ACTIVE, where, foundchar)) - newpos = where + 1 - - elif foundchar in my_cc[CC_INVALID]: - raise error, 'invalid char ' + `foundchar` - newpos = where + 1 - - elif foundchar in my_cc[CC_ENDLINE]: - # - # after an end of line, eat the rest of - # whitespace on the beginning of the next line - # this is what LaTeX more or less does - # - # also, try to indicate double newlines (\par) - # - lineno = lineno + 1 - savedwhere = where - newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl) - if newpos != end and buf[newpos] in \ - my_cc[CC_ENDLINE]: - result.append(chunk(DENDLINE, \ - savedwhere, foundchar)) - else: - result.append(chunk(ENDLINE, \ - savedwhere, foundchar)) - else: - result.append(chunk(OTHER, where, foundchar)) - newpos = where + 1 - - elif parsemode == mode(MODE_CS_SCAN): + global lineno + + if len(rest) == 3: + parsemode, start, lvl = rest + elif len(rest) == 2: + parsemode, start, lvl = rest + (0, ) + elif len(rest) == 1: + parsemode, start, lvl = rest + (0, 0) + elif len(rest) == 0: + parsemode, start, lvl = mode(MODE_REGULAR), 0, 0 + else: + raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])' + result = [] + end = len(buf) + if lvl == 0 and parsemode == mode(MODE_REGULAR): + lineno = 1 + lvl = lvl + 1 + + ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')' + + # + # some of the more regular modes... + # + + if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)): + cstate = [] + newpos = start + curpmode = parsemode + while 1: + where = newpos + #print '\tnew round: ' + epsilon(buf, where) + if where == end: + if lvl > 1 or curpmode != mode(MODE_REGULAR): + # not the way we started... + raise EOFError, 'premature end of file.' + lle(lvl, buf, where) + # the real ending of lvl-1 parse + return end, result + + pos = rc_regular.search(buf, where) + + if pos < 0: + pos = end + + if pos != where: + newpos, c = pos, chunk(PLAIN, where, (where, pos)) + result.append(c) + continue + + + # + # ok, pos == where and pos != end + # + foundchar = buf[where] + if foundchar in my_cc[CC_LBRACE]: + # recursive subgroup parse... + newpos, data = parseit(buf, curpmode, where+1, lvl) + result.append(chunk(GROUP, where, data)) + + elif foundchar in my_cc[CC_RBRACE]: + if lvl <= 1: + raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where) + if lvl == 1 and mode != mode(MODE_REGULAR): + raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)' + return where + 1, result + + elif foundchar in my_cc[CC_ESCAPE]: # - # scan for a control sequence token. `\ape', `\nut' or `\%' + # call the routine that actually deals with + # this problem. If do_ret is None, than + # return the value of do_ret # - if start == end: - raise EOFError, 'can\'t find end of csname' - pos = rc_cs_scan.search(buf, start) - if pos < 0: - pos = end - if pos == start: - # first non-letter right where we started the search - # ---> the control sequence name consists of one single - # character. Also: don't eat white space... - if buf[pos] in my_cc[CC_ENDLINE]: - lineno = lineno + 1 - pos = pos + 1 - return pos, (start, pos) + # Note that handle_cs might call this routine + # recursively again... + # + do_ret, newpos = handlecs(buf, where, + curpmode, lvl, result, end) + if do_ret != None: + return do_ret + + elif foundchar in my_cc[CC_COMMENT]: + newpos, data = parseit(buf, + mode(MODE_COMMENT), where+1, lvl) + result.append(chunk(COMMENT, where, data)) + + elif foundchar in my_cc[CC_MATHSHIFT]: + # note that recursive calls to math-mode + # scanning are called with recursion-level 0 + # again, in order to check for bad mathend + # + if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]: + # + # double mathshift, e.g. '$$' + # + if curpmode == mode(MODE_REGULAR): + newpos, data = parseit(buf, + mode(MODE_DMATH), + where+2, 0) + result.append(chunk(DMATH, + where, data)) + elif curpmode == mode(MODE_MATH): + raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) + elif lvl != 1: + raise error, 'bad mathend.' + lle(lvl, buf, where) + else: + return where + 2, result else: - spos = pos - if buf[pos] == '\n': - lineno = lineno + 1 - spos = pos + 1 - pos2, dummy = parseit(buf, \ - mode(MODE_GOBBLEWHITE), spos, lvl) - return pos2, (start, pos) - - elif parsemode == mode(MODE_GOBBLEWHITE): - if start == end: - return start, '' - pos = rc_endwhite.search(buf, start) - if pos < 0: - pos = start - return pos, (start, pos) + # + # single math shift, e.g. '$' + # + if curpmode == mode(MODE_REGULAR): + newpos, data = parseit(buf, + mode(MODE_MATH), + where+1, 0) + result.append(chunk(MATH, + where, data)) + elif curpmode == mode(MODE_DMATH): + raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) + elif lvl != 1: + raise error, 'bad mathend.' + lv(lvl, buf, where) + else: + return where + 1, result + + elif foundchar in my_cc[CC_IGNORE]: + print 'warning: ignored char', `foundchar` + newpos = where + 1 + + elif foundchar in my_cc[CC_ACTIVE]: + result.append(chunk(ACTIVE, where, foundchar)) + newpos = where + 1 + + elif foundchar in my_cc[CC_INVALID]: + raise error, 'invalid char ' + `foundchar` + newpos = where + 1 + + elif foundchar in my_cc[CC_ENDLINE]: + # + # after an end of line, eat the rest of + # whitespace on the beginning of the next line + # this is what LaTeX more or less does + # + # also, try to indicate double newlines (\par) + # + lineno = lineno + 1 + savedwhere = where + newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl) + if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]: + result.append(chunk(DENDLINE, savedwhere, foundchar)) + else: + result.append(chunk(ENDLINE, savedwhere, foundchar)) + else: + result.append(chunk(OTHER, where, foundchar)) + newpos = where + 1 - elif parsemode == mode(MODE_COMMENT): - pos = rc_comment.search(buf, start) + elif parsemode == mode(MODE_CS_SCAN): + # + # scan for a control sequence token. `\ape', `\nut' or `\%' + # + if start == end: + raise EOFError, 'can\'t find end of csname' + pos = rc_cs_scan.search(buf, start) + if pos < 0: + pos = end + if pos == start: + # first non-letter right where we started the search + # ---> the control sequence name consists of one single + # character. Also: don't eat white space... + if buf[pos] in my_cc[CC_ENDLINE]: lineno = lineno + 1 - if pos < 0: - print 'no newline perhaps?' - raise EOFError, 'can\'t find end of comment' - pos = pos + 1 - pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl) - return pos2, (start, pos) + pos = pos + 1 + return pos, (start, pos) + else: + spos = pos + if buf[pos] == '\n': + lineno = lineno + 1 + spos = pos + 1 + pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), spos, lvl) + return pos2, (start, pos) + elif parsemode == mode(MODE_GOBBLEWHITE): + if start == end: + return start, '' + pos = rc_endwhite.search(buf, start) + if pos < 0: + pos = start + return pos, (start, pos) + + elif parsemode == mode(MODE_COMMENT): + pos = rc_comment.search(buf, start) + lineno = lineno + 1 + if pos < 0: + print 'no newline perhaps?' + raise EOFError, 'can\'t find end of comment' + pos = pos + 1 + pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl) + return pos2, (start, pos) - else: - raise error, 'Unknown mode (' + `parsemode` + ')' + + else: + raise error, 'Unknown mode (' + `parsemode` + ')' #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl) @@ -613,182 +605,182 @@ re_endverb = regex.compile(un_re(endverbstr)) # return with the data in return_data # def handlecs(buf, where, curpmode, lvl, result, end): - global lineno - - # get the control sequence name... - newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl) - saveddata = data - - if s(buf, data) in ('begin', 'end'): - # skip the expected '{' and get the LaTeX-envname '}' - newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl) - if len(data) != 1: - raise error, 'expected 1 chunk of data.' + \ - lle(lvl, buf, where) - - # yucky, we've got an environment - envname = s(buf, data[0].data) - ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl) - if s(buf, saveddata) == 'begin' and envname == 'verbatim': - # verbatim deserves special treatment - pos = re_endverb.search(buf, newpos) - if pos < 0: - raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where) - result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))]))) - newpos = pos + len(endverbstr) - - elif s(buf, saveddata) == 'begin': - # start parsing recursively... If that parse returns - # from an '\end{...}', then should the last item of - # the returned data be a string containing the ended - # environment - newpos, data = parseit(buf, curpmode, newpos, lvl) - if not data or type(data[-1]) is not StringType: - raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos) - retenv = data[-1] - del data[-1] - if retenv != envname: - #[`retenv`, `envname`] - raise error, 'environments do not match.' + \ - lle(lvl, buf, where) + \ - epsilon(buf, newpos) - result.append(chunk(ENV, where, (retenv, data))) - else: - # 'end'... append the environment name, as just - # pointed out, and order parsit to return... - result.append(envname) - ##print 'POINT of return: ' + epsilon(buf, newpos) - # the tuple will be returned by parseit - return (newpos, result), newpos - - # end of \begin ... \end handling - - elif s(buf, data)[0:2] == 'if': - # another scary monster: the 'if' directive - flag = s(buf, data)[2:] - - # recursively call parseit, just like environment above.. - # the last item of data should contain the if-termination - # e.g., 'else' of 'fi' - newpos, data = parseit(buf, curpmode, newpos, lvl) - if not data or data[-1] not in ('else', 'fi'): - raise error, 'wrong if... termination' + \ - lle(lvl, buf, where) + epsilon(buf, newpos) - - ifterm = data[-1] - del data[-1] - # 0 means dont_negate flag - result.append(chunk(IF, where, (flag, 0, data))) - if ifterm == 'else': - # do the whole thing again, there is only one way - # to end this one, by 'fi' - newpos, data = parseit(buf, curpmode, newpos, lvl) - if not data or data[-1] not in ('fi', ): - raise error, 'wrong if...else... termination' \ - + lle(lvl, buf, where) \ - + epsilon(buf, newpos) - - ifterm = data[-1] - del data[-1] - result.append(chunk(IF, where, (flag, 1, data))) - #done implicitely: return None, newpos - - elif s(buf, data) in ('else', 'fi'): - result.append(s(buf, data)) - # order calling party to return tuple - return (newpos, result), newpos - - # end of \if, \else, ... \fi handling - - elif s(buf, saveddata) == 'verb': - x2 = saveddata[1] - result.append(chunk(CSNAME, where, data)) - if x2 == end: - raise error, 'premature end of command.' + lle(lvl, buf, where) - delimchar = buf[x2] - ##print 'VERB: delimchar ' + `delimchar` - pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1) - if pos < 0: - raise error, 'end of \'verb\' argument (' + \ - `delimchar` + ') not found.' + \ - lle(lvl, buf, where) - result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))])) - newpos = pos + 1 + global lineno + + # get the control sequence name... + newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl) + saveddata = data + + if s(buf, data) in ('begin', 'end'): + # skip the expected '{' and get the LaTeX-envname '}' + newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl) + if len(data) != 1: + raise error, 'expected 1 chunk of data.' + \ + lle(lvl, buf, where) + + # yucky, we've got an environment + envname = s(buf, data[0].data) + ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl) + if s(buf, saveddata) == 'begin' and envname == 'verbatim': + # verbatim deserves special treatment + pos = re_endverb.search(buf, newpos) + if pos < 0: + raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where) + result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))]))) + newpos = pos + len(endverbstr) + + elif s(buf, saveddata) == 'begin': + # start parsing recursively... If that parse returns + # from an '\end{...}', then should the last item of + # the returned data be a string containing the ended + # environment + newpos, data = parseit(buf, curpmode, newpos, lvl) + if not data or type(data[-1]) is not StringType: + raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos) + retenv = data[-1] + del data[-1] + if retenv != envname: + #[`retenv`, `envname`] + raise error, 'environments do not match.' + \ + lle(lvl, buf, where) + \ + epsilon(buf, newpos) + result.append(chunk(ENV, where, (retenv, data))) else: - result.append(chunk(CSNAME, where, data)) - return None, newpos + # 'end'... append the environment name, as just + # pointed out, and order parsit to return... + result.append(envname) + ##print 'POINT of return: ' + epsilon(buf, newpos) + # the tuple will be returned by parseit + return (newpos, result), newpos + + # end of \begin ... \end handling + + elif s(buf, data)[0:2] == 'if': + # another scary monster: the 'if' directive + flag = s(buf, data)[2:] + + # recursively call parseit, just like environment above.. + # the last item of data should contain the if-termination + # e.g., 'else' of 'fi' + newpos, data = parseit(buf, curpmode, newpos, lvl) + if not data or data[-1] not in ('else', 'fi'): + raise error, 'wrong if... termination' + \ + lle(lvl, buf, where) + epsilon(buf, newpos) + + ifterm = data[-1] + del data[-1] + # 0 means dont_negate flag + result.append(chunk(IF, where, (flag, 0, data))) + if ifterm == 'else': + # do the whole thing again, there is only one way + # to end this one, by 'fi' + newpos, data = parseit(buf, curpmode, newpos, lvl) + if not data or data[-1] not in ('fi', ): + raise error, 'wrong if...else... termination' \ + + lle(lvl, buf, where) \ + + epsilon(buf, newpos) + + ifterm = data[-1] + del data[-1] + result.append(chunk(IF, where, (flag, 1, data))) + #done implicitely: return None, newpos + + elif s(buf, data) in ('else', 'fi'): + result.append(s(buf, data)) + # order calling party to return tuple + return (newpos, result), newpos + + # end of \if, \else, ... \fi handling + + elif s(buf, saveddata) == 'verb': + x2 = saveddata[1] + result.append(chunk(CSNAME, where, data)) + if x2 == end: + raise error, 'premature end of command.' + lle(lvl, buf, where) + delimchar = buf[x2] + ##print 'VERB: delimchar ' + `delimchar` + pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1) + if pos < 0: + raise error, 'end of \'verb\' argument (' + \ + `delimchar` + ') not found.' + \ + lle(lvl, buf, where) + result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))])) + newpos = pos + 1 + else: + result.append(chunk(CSNAME, where, data)) + return None, newpos # this is just a function to get the string value if the possible data-tuple def s(buf, data): - if type(data) is StringType: - return data - if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType): - raise TypeError, 'expected tuple of 2 integers' - x1, x2 = data - return buf[x1:x2] + if type(data) is StringType: + return data + if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType): + raise TypeError, 'expected tuple of 2 integers' + x1, x2 = data + return buf[x1:x2] ##length, data1, i = getnextarg(length, buf, pp, i + 1) # make a deep-copy of some chunks def crcopy(r): - return map(chunkcopy, r) + return map(chunkcopy, r) # copy a chunk, would better be a method of class Chunk... def chunkcopy(ch): - if ch.chtype == chunk_type(GROUP): - return chunk(GROUP, ch.where, map(chunkcopy, ch.data)) - else: - return chunk(ch.chtype, ch.where, ch.data) + if ch.chtype == chunk_type(GROUP): + return chunk(GROUP, ch.where, map(chunkcopy, ch.data)) + else: + return chunk(ch.chtype, ch.where, ch.data) # get next argument for TeX-macro, flatten a group (insert between) # or return Command Sequence token, or give back one character def getnextarg(length, buf, pp, item): - ##wobj = Wobj() - ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) - ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' - - while item < length and pp[item].chtype == chunk_type(ENDLINE): - del pp[item] - length = length - 1 - if item >= length: - raise error, 'no next arg.' + epsilon(buf, pp[-1].where) - if pp[item].chtype == chunk_type(GROUP): - newpp = pp[item].data - del pp[item] - length = length - 1 - changeit(buf, newpp) - length = length + len(newpp) - pp[item:item] = newpp - item = item + len(newpp) - if len(newpp) < 10: - wobj = Wobj() - dumpit(buf, wobj.write, newpp) - ##print 'GETNEXTARG: inserted ' + `wobj.data` - return length, item - elif pp[item].chtype == chunk_type(PLAIN): - #grab one char - print 'WARNING: grabbing one char' - if len(s(buf, pp[item].data)) > 1: - pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1])) - item, length = item+1, length+1 - pp[item].data = s(buf, pp[item].data)[1:] - else: - item = item+1 - return length, item + ##wobj = Wobj() + ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) + ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + + while item < length and pp[item].chtype == chunk_type(ENDLINE): + del pp[item] + length = length - 1 + if item >= length: + raise error, 'no next arg.' + epsilon(buf, pp[-1].where) + if pp[item].chtype == chunk_type(GROUP): + newpp = pp[item].data + del pp[item] + length = length - 1 + changeit(buf, newpp) + length = length + len(newpp) + pp[item:item] = newpp + item = item + len(newpp) + if len(newpp) < 10: + wobj = Wobj() + dumpit(buf, wobj.write, newpp) + ##print 'GETNEXTARG: inserted ' + `wobj.data` + return length, item + elif pp[item].chtype == chunk_type(PLAIN): + #grab one char + print 'WARNING: grabbing one char' + if len(s(buf, pp[item].data)) > 1: + pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1])) + item, length = item+1, length+1 + pp[item].data = s(buf, pp[item].data)[1:] else: - ch = pp[item] - try: - str = `s(buf, ch.data)` - except TypeError: - str = `ch.data` - if len(str) > 400: - str = str[:400] + '...' - print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str - return length, item + item = item+1 + return length, item + else: + ch = pp[item] + try: + str = `s(buf, ch.data)` + except TypeError: + str = `ch.data` + if len(str) > 400: + str = str[:400] + '...' + print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str + return length, item # this one is needed to find the end of LaTeX's optional argument, like @@ -798,52 +790,52 @@ re_endopt = regex.compile(']') # get a LaTeX-optional argument, you know, the square braces '[' and ']' def getoptarg(length, buf, pp, item): - wobj = Wobj() - dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) - ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + wobj = Wobj() + dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) + ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + + if item >= length or \ + pp[item].chtype != chunk_type(PLAIN) or \ + s(buf, pp[item].data)[0] != '[': + return length, item + + pp[item].data = s(buf, pp[item].data)[1:] + if len(pp[item].data) == 0: + del pp[item] + length = length-1 + + while 1: + if item == length: + raise error, 'No end of optional arg found' + if pp[item].chtype == chunk_type(PLAIN): + text = s(buf, pp[item].data) + pos = re_endopt.search(text) + if pos >= 0: + pp[item].data = text[:pos] + if pos == 0: + del pp[item] + length = length-1 + else: + item=item+1 + text = text[pos+1:] - if item >= length or \ - pp[item].chtype != chunk_type(PLAIN) or \ - s(buf, pp[item].data)[0] != '[': - return length, item + while text and text[0] in ' \t': + text = text[1:] - pp[item].data = s(buf, pp[item].data)[1:] - if len(pp[item].data) == 0: - del pp[item] - length = length-1 + if text: + pp.insert(item, chunk(PLAIN, 0, text)) + length = length + 1 + return length, item - while 1: - if item == length: - raise error, 'No end of optional arg found' - if pp[item].chtype == chunk_type(PLAIN): - text = s(buf, pp[item].data) - pos = re_endopt.search(text) - if pos >= 0: - pp[item].data = text[:pos] - if pos == 0: - del pp[item] - length = length-1 - else: - item=item+1 - text = text[pos+1:] - - while text and text[0] in ' \t': - text = text[1:] - - if text: - pp.insert(item, chunk(PLAIN, 0, text)) - length = length + 1 - return length, item - - item = item+1 + item = item+1 # Wobj just add write-requests to the ``data'' attribute class Wobj: - data = '' + data = '' - def write(self, data): - self.data = self.data + data + def write(self, data): + self.data = self.data + data # ignore these commands ignoredcommands = ('bcode', 'ecode') @@ -855,7 +847,7 @@ themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves inargsselves = (',', '[', ']', '(', ')') # this is how *I* would show the difference between emph and strong # code 1 means: fold to uppercase -markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \ +markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), 'strong': ('*', '*')} # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT} @@ -868,95 +860,95 @@ for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', # try to remove macros and return flat text def flattext(buf, pp): - pp = crcopy(pp) - ##print '---> FLATTEXT ' + `pp` - wobj = Wobj() - - i, length = 0, len(pp) - while 1: - if len(pp) != length: - raise 'FATAL', 'inconsistent length' - if i >= length: - break - ch = pp[i] - i = i+1 - if ch.chtype == chunk_type(PLAIN): - pass - elif ch.chtype == chunk_type(CSNAME): - if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves: - ch.chtype = chunk_type(PLAIN) - elif s(buf, ch.data) == 'e': - ch.chtype = chunk_type(PLAIN) - ch.data = '\\' - elif len(s(buf, ch.data)) == 1 \ - and s(buf, ch.data) in onlylatexspecial: - ch.chtype = chunk_type(PLAIN) - # if it is followed by an empty group, - # remove that group, it was needed for - # a true space - if i < length \ - and pp[i].chtype==chunk_type(GROUP) \ - and len(pp[i].data) == 0: - del pp[i] - length = length-1 - - elif s(buf, ch.data) in markcmds.keys(): - length, newi = getnextarg(length, buf, pp, i) - str = flattext(buf, pp[i:newi]) - del pp[i:newi] - length = length - (newi - i) - ch.chtype = chunk_type(PLAIN) - markcmd = s(buf, ch.data) - x = markcmds[markcmd] - if type(x) == TupleType: - pre, after = x - str = pre+str+after - elif x == 1: - str = string.upper(str) - else: - raise 'FATAL', 'corrupt markcmds' - ch.data = str - else: - if s(buf, ch.data) not in ignoredcommands: - print 'WARNING: deleting command ' + `s(buf, ch.data)` - print 'PP' + `pp[i-1]` - del pp[i-1] - i, length = i-1, length-1 - elif ch.chtype == chunk_type(GROUP): - length, newi = getnextarg(length, buf, pp, i-1) - i = i-1 + pp = crcopy(pp) + ##print '---> FLATTEXT ' + `pp` + wobj = Wobj() + + i, length = 0, len(pp) + while 1: + if len(pp) != length: + raise 'FATAL', 'inconsistent length' + if i >= length: + break + ch = pp[i] + i = i+1 + if ch.chtype == chunk_type(PLAIN): + pass + elif ch.chtype == chunk_type(CSNAME): + if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves: + ch.chtype = chunk_type(PLAIN) + elif s(buf, ch.data) == 'e': + ch.chtype = chunk_type(PLAIN) + ch.data = '\\' + elif len(s(buf, ch.data)) == 1 \ + and s(buf, ch.data) in onlylatexspecial: + ch.chtype = chunk_type(PLAIN) + # if it is followed by an empty group, + # remove that group, it was needed for + # a true space + if i < length \ + and pp[i].chtype==chunk_type(GROUP) \ + and len(pp[i].data) == 0: + del pp[i] + length = length-1 + + elif s(buf, ch.data) in markcmds.keys(): + length, newi = getnextarg(length, buf, pp, i) + str = flattext(buf, pp[i:newi]) + del pp[i:newi] + length = length - (newi - i) + ch.chtype = chunk_type(PLAIN) + markcmd = s(buf, ch.data) + x = markcmds[markcmd] + if type(x) == TupleType: + pre, after = x + str = pre+str+after + elif x == 1: + str = string.upper(str) + else: + raise 'FATAL', 'corrupt markcmds' + ch.data = str + else: + if s(buf, ch.data) not in ignoredcommands: + print 'WARNING: deleting command ' + `s(buf, ch.data)` + print 'PP' + `pp[i-1]` + del pp[i-1] + i, length = i-1, length-1 + elif ch.chtype == chunk_type(GROUP): + length, newi = getnextarg(length, buf, pp, i-1) + i = i-1 ## str = flattext(buf, crcopy(pp[i-1:newi])) ## del pp[i:newi] ## length = length - (newi - i) ## ch.chtype = chunk_type(PLAIN) ## ch.data = str - else: - pass - - dumpit(buf, wobj.write, pp) - ##print 'FLATTEXT: RETURNING ' + `wobj.data` - return wobj.data + else: + pass + + dumpit(buf, wobj.write, pp) + ##print 'FLATTEXT: RETURNING ' + `wobj.data` + return wobj.data # try to generate node names (a bit shorter than the chapter title) # note that the \nodename command (see elsewhere) overules these efforts def invent_node_names(text): - words = string.split(text) - - ##print 'WORDS ' + `words` - - if len(words) == 2 \ - and string.lower(words[0]) == 'built-in' \ - and string.lower(words[1]) not in ('modules', 'functions'): - return words[1] - if len(words) == 3 and string.lower(words[1]) == 'module': - return words[2] - if len(words) == 3 and string.lower(words[1]) == 'object': - return string.join(words[0:2]) - if len(words) > 4 and string.lower(string.join(words[-4:])) == \ - 'methods and data attributes': - return string.join(words[:2]) - return text - + words = string.split(text) + + ##print 'WORDS ' + `words` + + if len(words) == 2 \ + and string.lower(words[0]) == 'built-in' \ + and string.lower(words[1]) not in ('modules', 'functions'): + return words[1] + if len(words) == 3 and string.lower(words[1]) == 'module': + return words[2] + if len(words) == 3 and string.lower(words[1]) == 'object': + return string.join(words[0:2]) + if len(words) > 4 and string.lower(string.join(words[-4:])) == \ + 'methods and data attributes': + return string.join(words[:2]) + return text + re_commas_etc = regex.compile('[,`\'@{}]') re_whitespace = regex.compile('[ \t]*') @@ -969,27 +961,27 @@ re_whitespace = regex.compile('[ \t]*') # Sometimes this is too much, maybe consider DENDLINE's as stop def next_command_p(length, buf, pp, i, cmdname): - while 1: - if i >= len(pp): - break - ch = pp[i] - i = i+1 - if ch.chtype == chunk_type(ENDLINE): - continue - if ch.chtype == chunk_type(DENDLINE): - continue - if ch.chtype == chunk_type(PLAIN): - if re_whitespace.search(s(buf, ch.data)) == 0 and \ - re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)): - continue - return -1 - if ch.chtype == chunk_type(CSNAME): - if s(buf, ch.data) == cmdname: - return i # _after_ the command - return -1 - return -1 - - + while 1: + if i >= len(pp): + break + ch = pp[i] + i = i+1 + if ch.chtype == chunk_type(ENDLINE): + continue + if ch.chtype == chunk_type(DENDLINE): + continue + if ch.chtype == chunk_type(PLAIN): + if re_whitespace.search(s(buf, ch.data)) == 0 and \ + re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)): + continue + return -1 + if ch.chtype == chunk_type(CSNAME): + if s(buf, ch.data) == cmdname: + return i # _after_ the command + return -1 + return -1 + + # things that are special to LaTeX, but not to texi.. onlylatexspecial = '_~^$#&%' @@ -999,17 +991,17 @@ hist = Struct() out = Struct() def startchange(): - global hist, out + global hist, out - hist.inenv = [] - hist.nodenames = [] - hist.cindex = [] - hist.inargs = 0 - hist.enumeratenesting, hist.itemizenesting = 0, 0 + hist.inenv = [] + hist.nodenames = [] + hist.cindex = [] + hist.inargs = 0 + hist.enumeratenesting, hist.itemizenesting = 0, 0 + + out.doublenodes = [] + out.doublecindeces = [] - out.doublenodes = [] - out.doublecindeces = [] - spacech = [chunk(PLAIN, 0, ' ')] commach = [chunk(PLAIN, 0, ', ')] @@ -1051,174 +1043,173 @@ enumeratesymbols = ['1', 'A', 'a'] ## or \funcline{NAME}{ARGS} ## def do_funcdesc(length, buf, pp, i): - startpoint = i-1 - ch = pp[startpoint] - wh = ch.where - length, newi = getnextarg(length, buf, pp, i) - funcname = chunk(GROUP, wh, pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - save = hist.inargs - hist.inargs = 1 - length, newi = getnextarg(length, buf, pp, i) - hist.inargs = save - del save - the_args = [chunk(PLAIN, wh, '()'[0])] + \ - pp[i:newi] + \ - [chunk(PLAIN, wh, '()'[1])] - del pp[i:newi] - length = length - (newi-i) - - idxsi = hist.indexsubitem # words - command = '' - cat_class = '' - if idxsi and idxsi[-1] in ('method', 'protocol'): - command = 'defmethod' - cat_class = string.join(idxsi[:-1]) - elif len(idxsi) == 2 and idxsi[1] == 'function': - command = 'deffn' - cat_class = string.join(idxsi) - elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: - command = 'deffn' - cat_class = 'function of ' + string.join(idxsi[1:]) - - if not command: - raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] - cslinearg.append(chunk(PLAIN, wh, ' ')) - cslinearg.append(funcname) - cslinearg.append(chunk(PLAIN, wh, ' ')) - l = len(cslinearg) - cslinearg[l:l] = the_args + startpoint = i-1 + ch = pp[startpoint] + wh = ch.where + length, newi = getnextarg(length, buf, pp, i) + funcname = chunk(GROUP, wh, pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + save = hist.inargs + hist.inargs = 1 + length, newi = getnextarg(length, buf, pp, i) + hist.inargs = save + del save + the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \ + [chunk(PLAIN, wh, '()'[1])] + del pp[i:newi] + length = length - (newi-i) + + idxsi = hist.indexsubitem # words + command = '' + cat_class = '' + if idxsi and idxsi[-1] in ('method', 'protocol'): + command = 'defmethod' + cat_class = string.join(idxsi[:-1]) + elif len(idxsi) == 2 and idxsi[1] == 'function': + command = 'deffn' + cat_class = string.join(idxsi) + elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: + command = 'deffn' + cat_class = 'function of ' + string.join(idxsi[1:]) + + if not command: + raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + cslinearg.append(chunk(PLAIN, wh, ' ')) + cslinearg.append(funcname) + cslinearg.append(chunk(PLAIN, wh, ' ')) + l = len(cslinearg) + cslinearg[l:l] = the_args + + pp.insert(i, chunk(GROUP, wh, cslinearg)) + i, length = i+1, length+1 + hist.command = command + return length, i - pp.insert(i, chunk(GROUP, wh, cslinearg)) - i, length = i+1, length+1 - hist.command = command - return length, i - ## this routine will be called on \begin{excdesc}{NAME} ## or \excline{NAME} ## def do_excdesc(length, buf, pp, i): - startpoint = i-1 - ch = pp[startpoint] - wh = ch.where - length, newi = getnextarg(length, buf, pp, i) - excname = chunk(GROUP, wh, pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - idxsi = hist.indexsubitem # words - command = '' - cat_class = '' - class_class = '' - if len(idxsi) == 2 and idxsi[1] == 'exception': - command = 'defvr' - cat_class = string.join(idxsi) - elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: - command = 'defcv' - cat_class = 'exception' - class_class = string.join(idxsi[1:]) - elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']: - command = 'defcv' - cat_class = 'exception' - class_class = string.join(idxsi[2:]) - - - if not command: - raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + startpoint = i-1 + ch = pp[startpoint] + wh = ch.where + length, newi = getnextarg(length, buf, pp, i) + excname = chunk(GROUP, wh, pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + idxsi = hist.indexsubitem # words + command = '' + cat_class = '' + class_class = '' + if len(idxsi) == 2 and idxsi[1] == 'exception': + command = 'defvr' + cat_class = string.join(idxsi) + elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: + command = 'defcv' + cat_class = 'exception' + class_class = string.join(idxsi[1:]) + elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']: + command = 'defcv' + cat_class = 'exception' + class_class = string.join(idxsi[2:]) + + + if not command: + raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + cslinearg.append(chunk(PLAIN, wh, ' ')) + if class_class: + cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) cslinearg.append(chunk(PLAIN, wh, ' ')) - if class_class: - cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) - cslinearg.append(chunk(PLAIN, wh, ' ')) - cslinearg.append(excname) + cslinearg.append(excname) - pp.insert(i, chunk(GROUP, wh, cslinearg)) - i, length = i+1, length+1 - hist.command = command - return length, i + pp.insert(i, chunk(GROUP, wh, cslinearg)) + i, length = i+1, length+1 + hist.command = command + return length, i ## same for datadesc or dataline... def do_datadesc(length, buf, pp, i): - startpoint = i-1 - ch = pp[startpoint] - wh = ch.where - length, newi = getnextarg(length, buf, pp, i) - dataname = chunk(GROUP, wh, pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - idxsi = hist.indexsubitem # words - command = '' - cat_class = '' - class_class = '' - if idxsi[-1] in ('attribute', 'option'): - command = 'defcv' - cat_class = idxsi[-1] - class_class = string.join(idxsi[:-1]) - elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: - command = 'defcv' - cat_class = 'data' - class_class = string.join(idxsi[1:]) - elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']: - command = 'defcv' - cat_class = 'data' - class_class = string.join(idxsi[2:]) - - - if not command: - raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + startpoint = i-1 + ch = pp[startpoint] + wh = ch.where + length, newi = getnextarg(length, buf, pp, i) + dataname = chunk(GROUP, wh, pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + idxsi = hist.indexsubitem # words + command = '' + cat_class = '' + class_class = '' + if idxsi[-1] in ('attribute', 'option'): + command = 'defcv' + cat_class = idxsi[-1] + class_class = string.join(idxsi[:-1]) + elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: + command = 'defcv' + cat_class = 'data' + class_class = string.join(idxsi[1:]) + elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']: + command = 'defcv' + cat_class = 'data' + class_class = string.join(idxsi[2:]) + + + if not command: + raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] + cslinearg.append(chunk(PLAIN, wh, ' ')) + if class_class: + cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) cslinearg.append(chunk(PLAIN, wh, ' ')) - if class_class: - cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) - cslinearg.append(chunk(PLAIN, wh, ' ')) - cslinearg.append(dataname) + cslinearg.append(dataname) + + pp.insert(i, chunk(GROUP, wh, cslinearg)) + i, length = i+1, length+1 + hist.command = command + return length, i - pp.insert(i, chunk(GROUP, wh, cslinearg)) - i, length = i+1, length+1 - hist.command = command - return length, i - # regular indices: those that are not set in tt font by default.... regindices = ('cindex', ) # remove illegal characters from node names def rm_commas_etc(text): - result = '' - changed = 0 - while 1: - pos = re_commas_etc.search(text) - if pos >= 0: - changed = 1 - result = result + text[:pos] - text = text[pos+1:] - else: - result = result + text - break - if changed: - print 'Warning: nodename changhed to ' + `result` + result = '' + changed = 0 + while 1: + pos = re_commas_etc.search(text) + if pos >= 0: + changed = 1 + result = result + text[:pos] + text = text[pos+1:] + else: + result = result + text + break + if changed: + print 'Warning: nodename changhed to ' + `result` + + return result - return result - # boolean flags flags = {'texi': 1} - + ## ## changeit: the actual routine, that changes the contents of the parsed @@ -1226,932 +1217,927 @@ flags = {'texi': 1} ## def changeit(buf, pp): - global onlylatexspecial, hist, out + global onlylatexspecial, hist, out + + i, length = 0, len(pp) + while 1: + # sanity check: length should always equal len(pp) + if len(pp) != length: + raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)` + if i >= length: + break + ch = pp[i] + i = i + 1 + + if type(ch) is StringType: + #normally, only chunks are present in pp, + # but in some cases, some extra info + # has been inserted, e.g., the \end{...} clauses + raise 'FATAL', 'got string, probably too many ' + `end` - i, length = 0, len(pp) - while 1: - # sanity check: length should always equal len(pp) - if len(pp) != length: - raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)` - if i >= length: - break - ch = pp[i] - i = i + 1 + if ch.chtype == chunk_type(GROUP): + # check for {\em ...} constructs + if ch.data and \ + ch.data[0].chtype == chunk_type(CSNAME) and \ + s(buf, ch.data[0].data) in fontchanges.keys(): + k = s(buf, ch.data[0].data) + del ch.data[0] + pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k])) + length, i = length+1, i+1 + + # recursively parse the contents of the group + changeit(buf, ch.data) + + elif ch.chtype == chunk_type(IF): + # \if... + flag, negate, data = ch.data + ##print 'IF: flag, negate = ' + `flag, negate` + if flag not in flags.keys(): + raise error, 'unknown flag ' + `flag` + + value = flags[flag] + if negate: + value = (not value) + del pp[i-1] + length, i = length-1, i-1 + if value: + pp[i:i] = data + length = length + len(data) + + + elif ch.chtype == chunk_type(ENV): + # \begin{...} .... + envname, data = ch.data + + #push this environment name on stack + hist.inenv.insert(0, envname) + + #append an endenv chunk after grouped data + data.append(chunk(ENDENV, ch.where, envname)) + ##[`data`] + + #delete this object + del pp[i-1] + i, length = i-1, length-1 + + #insert found data + pp[i:i] = data + length = length + len(data) + + if envname == 'verbatim': + pp[i:i] = [chunk(CSLINE, ch.where, 'example'), + chunk(GROUP, ch.where, [])] + length, i = length+2, i+2 + + elif envname == 'itemize': + if hist.itemizenesting > len(itemizesymbols): + raise error, 'too deep itemize nesting' + ingroupch = [chunk(CSNAME, ch.where, + itemizesymbols[hist.itemizenesting])] + hist.itemizenesting = hist.itemizenesting + 1 + pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'), + chunk(GROUP, ch.where, ingroupch)] + length, i = length+2, i+2 + + elif envname == 'enumerate': + if hist.enumeratenesting > len(enumeratesymbols): + raise error, 'too deep enumerate nesting' + ingroupch = [chunk(PLAIN, ch.where, + enumeratesymbols[hist.enumeratenesting])] + hist.enumeratenesting = hist.enumeratenesting + 1 + pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'), + chunk(GROUP, ch.where, ingroupch)] + length, i = length+2, i+2 + + elif envname == 'description': + ingroupch = [chunk(CSNAME, ch.where, 'b')] + pp[i:i] = [chunk(CSLINE, ch.where, 'table'), + chunk(GROUP, ch.where, ingroupch)] + length, i = length+2, i+2 + + elif (envname == 'tableiii') or (envname == 'tableii'): + if (envname == 'tableii'): + ltable = 2 + else: + ltable = 3 + wh = ch.where + newcode = [] + + #delete tabular format description + # e.g., {|l|c|l|} + length, newi = getnextarg(length, buf, pp, i) + del pp[i:newi] + length = length - (newi-i) + + newcode.append(chunk(CSLINE, wh, 'table')) + ingroupch = [chunk(CSNAME, wh, 'asis')] + newcode.append(chunk(GROUP, wh, ingroupch)) + newcode.append(chunk(CSLINE, wh, 'item')) + + #get the name of macro for @item + # e.g., {code} + length, newi = getnextarg(length, buf, pp, i) + + if newi-i != 1: + raise error, 'Sorry, expected 1 chunk argument' + if pp[i].chtype != chunk_type(PLAIN): + raise error, 'Sorry, expected plain text argument' + hist.itemargmacro = s(buf, pp[i].data) + del pp[i:newi] + length = length - (newi-i) + + itembody = [] + for count in range(ltable): + length, newi = getnextarg(length, buf, pp, i) + emphgroup = [ + chunk(CSNAME, wh, 'emph'), + chunk(GROUP, 0, pp[i:newi])] + del pp[i:newi] + length = length - (newi-i) + if count == 0: + itemarg = emphgroup + elif count == ltable-1: + itembody = itembody + \ + [chunk(PLAIN, wh, ' --- ')] + emphgroup + else: + itembody = emphgroup + newcode.append(chunk(GROUP, wh, itemarg)) + newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')] + pp[i:i] = newcode + l = len(newcode) + length, i = length+l, i+l + del newcode, l + + if length != len(pp): + raise 'STILL, SOMETHING wrong', `i` + + + elif envname == 'funcdesc': + pp.insert(i, chunk(PLAIN, ch.where, '')) + i, length = i+1, length+1 + length, i = do_funcdesc(length, buf, pp, i) + + elif envname == 'excdesc': + pp.insert(i, chunk(PLAIN, ch.where, '')) + i, length = i+1, length+1 + length, i = do_excdesc(length, buf, pp, i) + + elif envname == 'datadesc': + pp.insert(i, chunk(PLAIN, ch.where, '')) + i, length = i+1, length+1 + length, i = do_datadesc(length, buf, pp, i) + + else: + print 'WARNING: don\'t know what to do with env ' + `envname` + + elif ch.chtype == chunk_type(ENDENV): + envname = ch.data + if envname != hist.inenv[0]: + raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]` + del hist.inenv[0] + del pp[i-1] + i, length = i-1, length-1 + + if envname == 'verbatim': + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'example')])] + i, length = i+2, length+2 + elif envname == 'itemize': + hist.itemizenesting = hist.itemizenesting - 1 + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'itemize')])] + i, length = i+2, length+2 + elif envname == 'enumerate': + hist.enumeratenesting = hist.enumeratenesting-1 + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'enumerate')])] + i, length = i+2, length+2 + elif envname == 'description': + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'table')])] + i, length = i+2, length+2 + elif (envname == 'tableiii') or (envname == 'tableii'): + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, 'table')])] + i, length = i+2, length + 2 + pp.insert(i, chunk(DENDLINE, ch.where, '\n')) + i, length = i+1, length+1 + + elif envname in ('funcdesc', 'excdesc', 'datadesc'): + pp[i:i] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + else: + print 'WARNING: ending env ' + `envname` + 'has no actions' + + elif ch.chtype == chunk_type(CSNAME): + # control name transformations + if s(buf, ch.data) == 'optional': + pp[i-1].chtype = chunk_type (PLAIN) + pp[i-1].data = '[' + if (i < length) and \ + (pp[i].chtype == chunk_type(GROUP)): + cp=pp[i].data + pp[i:i+1]=cp + [ + chunk(PLAIN, ch.where, ']')] + length = length+len(cp) + elif s(buf, ch.data) in ignoredcommands: + del pp[i-1] + i, length = i-1, length-1 + elif s(buf, ch.data) == '@' and \ + i != length and \ + pp[i].chtype == chunk_type(PLAIN) and \ + s(buf, pp[i].data)[0] == '.': + # \@. --> \. --> @. + ch.data = '.' + del pp[i] + length = length-1 + elif s(buf, ch.data) == '\\': + # \\ --> \* --> @* + ch.data = '*' + elif len(s(buf, ch.data)) == 1 and \ + s(buf, ch.data) in onlylatexspecial: + ch.chtype = chunk_type(PLAIN) + # check if such a command is followed by + # an empty group: e.g., `\%{}'. If so, remove + # this empty group too + if i < length and \ + pp[i].chtype == chunk_type(GROUP) \ + and len(pp[i].data) == 0: + del pp[i] + length = length-1 + + elif hist.inargs and s(buf, ch.data) in inargsselves: + # This is the special processing of the + # arguments of the \begin{funcdesc}... or + # \funcline... arguments + # \, --> , \[ --> [, \] --> ] + ch.chtype = chunk_type(PLAIN) + + elif s(buf, ch.data) == 'renewcommand': + # \renewcommand{\indexsubitem}.... + i, length = i-1, length-1 + del pp[i] + length, newi = getnextarg(length, buf, pp, i) + if newi-i == 1 \ + and i < length \ + and pp[i].chtype == chunk_type(CSNAME) \ + and s(buf, pp[i].data) == 'indexsubitem': + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + text = flattext(buf, pp[i:newi]) + if text[:1] != '(' or text[-1:] != ')': + raise error, 'expected indexsubitme enclosed in braces' + words = string.split(text[1:-1]) + hist.indexsubitem = words + del text, words + else: + print 'WARNING: renewcommand with unsupported arg removed' + del pp[i:newi] + length = length - (newi-i) + + elif s(buf, ch.data) == 'item': + ch.chtype = chunk_type(CSLINE) + length, newi = getoptarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + i, length = i+1, length+1 + + elif s(buf, ch.data) == 'ttindex': + idxsi = hist.indexsubitem + + cat_class = '' + if len(idxsi) >= 2 and idxsi[1] in \ + ('method', 'function', 'protocol'): + command = 'findex' + elif len(idxsi) >= 2 and idxsi[1] in \ + ('exception', 'object'): + command = 'vindex' + else: + print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command' + command = 'cindex' - if type(ch) is StringType: - #normally, only chunks are present in pp, - # but in some cases, some extra info - # has been inserted, e.g., the \end{...} clauses - raise 'FATAL', 'got string, probably too many ' + `end` - - if ch.chtype == chunk_type(GROUP): - # check for {\em ...} constructs - if ch.data and \ - ch.data[0].chtype == chunk_type(CSNAME) and \ - s(buf, ch.data[0].data) in fontchanges.keys(): - k = s(buf, ch.data[0].data) - del ch.data[0] - pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k])) - length, i = length+1, i+1 - - # recursively parse the contents of the group - changeit(buf, ch.data) - - elif ch.chtype == chunk_type(IF): - # \if... - flag, negate, data = ch.data - ##print 'IF: flag, negate = ' + `flag, negate` - if flag not in flags.keys(): - raise error, 'unknown flag ' + `flag` - - value = flags[flag] - if negate: - value = (not value) - del pp[i-1] - length, i = length-1, i-1 - if value: - pp[i:i] = data - length = length + len(data) - - - elif ch.chtype == chunk_type(ENV): - # \begin{...} .... - envname, data = ch.data - - #push this environment name on stack - hist.inenv.insert(0, envname) - - #append an endenv chunk after grouped data - data.append(chunk(ENDENV, ch.where, envname)) - ##[`data`] - - #delete this object - del pp[i-1] - i, length = i-1, length-1 - - #insert found data - pp[i:i] = data - length = length + len(data) - - if envname == 'verbatim': - pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \ - chunk(GROUP, ch.where, [])] - length, i = length+2, i+2 - - elif envname == 'itemize': - if hist.itemizenesting > len(itemizesymbols): - raise error, 'too deep itemize nesting' - ingroupch = [chunk(CSNAME, ch.where,\ - itemizesymbols[hist.itemizenesting])] - hist.itemizenesting = hist.itemizenesting + 1 - pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\ - chunk(GROUP, ch.where, ingroupch)] - length, i = length+2, i+2 - - elif envname == 'enumerate': - if hist.enumeratenesting > len(enumeratesymbols): - raise error, 'too deep enumerate nesting' - ingroupch = [chunk(PLAIN, ch.where,\ - enumeratesymbols[hist.enumeratenesting])] - hist.enumeratenesting = hist.enumeratenesting + 1 - pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\ - chunk(GROUP, ch.where, ingroupch)] - length, i = length+2, i+2 - - elif envname == 'description': - ingroupch = [chunk(CSNAME, ch.where, 'b')] - pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \ - chunk(GROUP, ch.where, ingroupch)] - length, i = length+2, i+2 - - elif (envname == 'tableiii') or \ - (envname == 'tableii'): - if (envname == 'tableii'): - ltable = 2 - else: - ltable = 3 - wh = ch.where - newcode = [] - - #delete tabular format description - # e.g., {|l|c|l|} - length, newi = getnextarg(length, buf, pp, i) - del pp[i:newi] - length = length - (newi-i) - - newcode.append(chunk(CSLINE, wh, 'table')) - ingroupch = [chunk(CSNAME, wh, 'asis')] - newcode.append(chunk(GROUP, wh, ingroupch)) - newcode.append(chunk(CSLINE, wh, 'item')) - - #get the name of macro for @item - # e.g., {code} - length, newi = getnextarg(length, buf, pp, i) - - if newi-i != 1: - raise error, 'Sorry, expected 1 chunk argument' - if pp[i].chtype != chunk_type(PLAIN): - raise error, 'Sorry, expected plain text argument' - hist.itemargmacro = s(buf, pp[i].data) - del pp[i:newi] - length = length - (newi-i) - - itembody = [] - for count in range(ltable): - length, newi = getnextarg(length, buf, pp, i) - emphgroup = [\ - chunk(CSNAME, wh, 'emph'), \ - chunk(GROUP, 0, pp[i:newi])] - del pp[i:newi] - length = length - (newi-i) - if count == 0: - itemarg = emphgroup - elif count == ltable-1: - itembody = itembody + \ - [chunk(PLAIN, wh, ' --- ')] + \ - emphgroup - else: - itembody = emphgroup - newcode.append(chunk(GROUP, wh, itemarg)) - newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')] - pp[i:i] = newcode - l = len(newcode) - length, i = length+l, i+l - del newcode, l - - if length != len(pp): - raise 'STILL, SOMETHING wrong', `i` - - - elif envname == 'funcdesc': - pp.insert(i, chunk(PLAIN, ch.where, '')) - i, length = i+1, length+1 - length, i = do_funcdesc(length, buf, pp, i) - - elif envname == 'excdesc': - pp.insert(i, chunk(PLAIN, ch.where, '')) - i, length = i+1, length+1 - length, i = do_excdesc(length, buf, pp, i) - - elif envname == 'datadesc': - pp.insert(i, chunk(PLAIN, ch.where, '')) - i, length = i+1, length+1 - length, i = do_datadesc(length, buf, pp, i) - - else: - print 'WARNING: don\'t know what to do with env ' + `envname` - - elif ch.chtype == chunk_type(ENDENV): - envname = ch.data - if envname != hist.inenv[0]: - raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]` - del hist.inenv[0] - del pp[i-1] - i, length = i-1, length-1 - - if envname == 'verbatim': - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'example')])] - i, length = i+2, length+2 - elif envname == 'itemize': - hist.itemizenesting = hist.itemizenesting - 1 - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'itemize')])] - i, length = i+2, length+2 - elif envname == 'enumerate': - hist.enumeratenesting = hist.enumeratenesting-1 - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'enumerate')])] - i, length = i+2, length+2 - elif envname == 'description': - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'table')])] - i, length = i+2, length+2 - elif (envname == 'tableiii') or (envname == 'tableii'): - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, 'table')])] - i, length = i+2, length + 2 - pp.insert(i, chunk(DENDLINE, ch.where, '\n')) - i, length = i+1, length+1 - - elif envname in ('funcdesc', 'excdesc', 'datadesc'): - pp[i:i] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - else: - print 'WARNING: ending env ' + `envname` + 'has no actions' - - elif ch.chtype == chunk_type(CSNAME): - # control name transformations - if s(buf, ch.data) == 'optional': - pp[i-1].chtype = chunk_type (PLAIN) - pp[i-1].data = '[' - if (i < length) and \ - (pp[i].chtype == chunk_type(GROUP)): - cp=pp[i].data - pp[i:i+1]=cp + [\ - chunk(PLAIN, ch.where, ']')] - length = length+len(cp) - elif s(buf, ch.data) in ignoredcommands: - del pp[i-1] - i, length = i-1, length-1 - elif s(buf, ch.data) == '@' and \ - i != length and \ - pp[i].chtype == chunk_type(PLAIN) and \ - s(buf, pp[i].data)[0] == '.': - # \@. --> \. --> @. - ch.data = '.' - del pp[i] - length = length-1 - elif s(buf, ch.data) == '\\': - # \\ --> \* --> @* - ch.data = '*' - elif len(s(buf, ch.data)) == 1 and \ - s(buf, ch.data) in onlylatexspecial: - ch.chtype = chunk_type(PLAIN) - # check if such a command is followed by - # an empty group: e.g., `\%{}'. If so, remove - # this empty group too - if i < length and \ - pp[i].chtype == chunk_type(GROUP) \ - and len(pp[i].data) == 0: - del pp[i] - length = length-1 - - elif hist.inargs and s(buf, ch.data) in inargsselves: - # This is the special processing of the - # arguments of the \begin{funcdesc}... or - # \funcline... arguments - # \, --> , \[ --> [, \] --> ] - ch.chtype = chunk_type(PLAIN) - - elif s(buf, ch.data) == 'renewcommand': - # \renewcommand{\indexsubitem}.... - i, length = i-1, length-1 - del pp[i] - length, newi = getnextarg(length, buf, pp, i) - if newi-i == 1 \ - and i < length \ - and pp[i].chtype == chunk_type(CSNAME) \ - and s(buf, pp[i].data) == 'indexsubitem': - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - text = flattext(buf, pp[i:newi]) - if text[:1] != '(' or text[-1:] != ')': - raise error, 'expected indexsubitme enclosed in braces' - words = string.split(text[1:-1]) - hist.indexsubitem = words - del text, words - else: - print 'WARNING: renewcommand with unsupported arg removed' - del pp[i:newi] - length = length - (newi-i) - - elif s(buf, ch.data) == 'item': - ch.chtype = chunk_type(CSLINE) - length, newi = getoptarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - i, length = i+1, length+1 - - elif s(buf, ch.data) == 'ttindex': - idxsi = hist.indexsubitem - - cat_class = '' - if len(idxsi) >= 2 and idxsi[1] in \ - ('method', 'function', 'protocol'): - command = 'findex' - elif len(idxsi) >= 2 and idxsi[1] in \ - ('exception', 'object'): - command = 'vindex' - else: - print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command' - command = 'cindex' - - if not cat_class: - cat_class = '('+string.join(idxsi)+')' - - ch.chtype = chunk_type(CSLINE) - ch.data = command - - length, newi = getnextarg(length, buf, pp, i) - arg = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - cat_arg = [chunk(PLAIN, ch.where, cat_class)] - - # determine what should be set in roman, and - # what in tt-font - if command in regindices: - - arg = [chunk(CSNAME, ch.where, 't'), \ - chunk(GROUP, ch.where, arg)] - else: - cat_arg = [chunk(CSNAME, ch.where, 'r'), \ - chunk(GROUP, ch.where, cat_arg)] - - ingroupch = arg + \ - [chunk(PLAIN, ch.where, ' ')] + \ - cat_arg - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'ldots': - # \ldots --> \dots{} --> @dots{} - ch.data = 'dots' - if i == length \ - or pp[i].chtype != chunk_type(GROUP) \ - or pp[i].data != []: - pp.insert(i, chunk(GROUP, ch.where, [])) - i, length = i+1, length+1 - elif s(buf, ch.data) in wordsselves: - # \UNIX --> UNIX - ch.chtype = chunk_type(PLAIN) - if i != length \ - and pp[i].chtype == chunk_type(GROUP) \ - and pp[i].data == []: - del pp[i] - length = length-1 - elif s(buf, ch.data) in for_texi: - pass - - elif s(buf, ch.data) == 'e': - # "\e" --> "\" - ch.data = '\\' - ch.chtype = chunk_type(PLAIN) - elif (s(buf, ch.data) == 'lineiii') or\ - (s(buf, ch.data) == 'lineii'): - # This is the most tricky one - # \lineiii{a1}{a2}[{a3}] --> - # @item @<cts. of itemargmacro>{a1} - # a2 [ -- a3] - # - ##print 'LINEIIIIII!!!!!!!' + if not cat_class: + cat_class = '('+string.join(idxsi)+')' + + ch.chtype = chunk_type(CSLINE) + ch.data = command + + length, newi = getnextarg(length, buf, pp, i) + arg = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + cat_arg = [chunk(PLAIN, ch.where, cat_class)] + + # determine what should be set in roman, and + # what in tt-font + if command in regindices: + + arg = [chunk(CSNAME, ch.where, 't'), + chunk(GROUP, ch.where, arg)] + else: + cat_arg = [chunk(CSNAME, ch.where, 'r'), + chunk(GROUP, ch.where, cat_arg)] + + ingroupch = arg + \ + [chunk(PLAIN, ch.where, ' ')] + \ + cat_arg + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'ldots': + # \ldots --> \dots{} --> @dots{} + ch.data = 'dots' + if i == length \ + or pp[i].chtype != chunk_type(GROUP) \ + or pp[i].data != []: + pp.insert(i, chunk(GROUP, ch.where, [])) + i, length = i+1, length+1 + elif s(buf, ch.data) in wordsselves: + # \UNIX --> UNIX + ch.chtype = chunk_type(PLAIN) + if i != length \ + and pp[i].chtype == chunk_type(GROUP) \ + and pp[i].data == []: + del pp[i] + length = length-1 + elif s(buf, ch.data) in for_texi: + pass + + elif s(buf, ch.data) == 'e': + # "\e" --> "\" + ch.data = '\\' + ch.chtype = chunk_type(PLAIN) + elif (s(buf, ch.data) == 'lineiii') or\ + (s(buf, ch.data) == 'lineii'): + # This is the most tricky one + # \lineiii{a1}{a2}[{a3}] --> + # @item @<cts. of itemargmacro>{a1} + # a2 [ -- a3] + # + ##print 'LINEIIIIII!!!!!!!' ## wobj = Wobj() ## dumpit(buf, wobj.write, pp[i-1:i+5]) ## print '--->' + wobj.data + '<----' - if not hist.inenv: - raise error, \ - 'no environment for lineiii' - if (hist.inenv[0] != 'tableiii') and\ - (hist.inenv[0] != 'tableii'): - raise error, \ - 'wrong command (' + \ - s(buf, ch.data)+ \ - ') in wrong environment (' \ - + `hist.inenv[0]` + ')' - ch.chtype = chunk_type(CSLINE) - ch.data = 'item' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = [chunk(CSNAME, 0, \ - hist.itemargmacro), \ - chunk(GROUP, 0, pp[i:newi])] - del pp[i:newi] - length = length - (newi-i) + if not hist.inenv: + raise error, 'no environment for lineiii' + if (hist.inenv[0] != 'tableiii') and \ + (hist.inenv[0] != 'tableii'): + raise error, \ + 'wrong command (' + \ + s(buf, ch.data)+ \ + ') in wrong environment (' \ + + `hist.inenv[0]` + ')' + ch.chtype = chunk_type(CSLINE) + ch.data = 'item' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = [chunk(CSNAME, 0, + hist.itemargmacro), + chunk(GROUP, 0, pp[i:newi])] + del pp[i:newi] + length = length - (newi-i) ## print 'ITEM ARG: --->', ## wobj = Wobj() ## dumpit(buf, wobj.write, ingroupch) ## print wobj.data, '<---' - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - grouppos = i - i, length = i+1, length+1 - length, i = getnextarg(length, buf, pp, i) - length, newi = getnextarg(length, buf, pp, i) - if newi > i: - # we have a 3rd arg - pp.insert(i, chunk(PLAIN, ch.where, ' --- ')) - i = newi + 1 - length = length + 1 + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + grouppos = i + i, length = i+1, length+1 + length, i = getnextarg(length, buf, pp, i) + length, newi = getnextarg(length, buf, pp, i) + if newi > i: + # we have a 3rd arg + pp.insert(i, chunk(PLAIN, ch.where, ' --- ')) + i = newi + 1 + length = length + 1 ## pp[grouppos].data = pp[grouppos].data \ ## + [chunk(PLAIN, ch.where, ' ')] \ ## + pp[i:newi] ## del pp[i:newi] ## length = length - (newi-i) - if length != len(pp): - raise 'IN LINEIII IS THE ERR', `i` - - elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'): - #\xxxsection{A} ----> - # @node A, , , - # @xxxsection A - ## also: remove commas and quotes - ch.chtype = chunk_type(CSLINE) - length, newi = getnextarg(length, buf, pp, i) - afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename') - if afternodenamecmd < 0: - cp1 = crcopy(pp[i:newi]) - pp[i:newi] = [\ - chunk(GROUP, ch.where, \ - pp[i:newi])] - length, newi = length - (newi-i) + 1, \ - i+1 - text = flattext(buf, cp1) - text = invent_node_names(text) - else: - length, endarg = getnextarg(length, buf, pp, afternodenamecmd) - cp1 = crcopy(pp[afternodenamecmd:endarg]) - del pp[newi:endarg] - length = length - (endarg-newi) - - pp[i:newi] = [\ - chunk(GROUP, ch.where, \ - pp[i:newi])] - length, newi = length - (newi-i) + 1, \ - i + 1 - text = flattext(buf, cp1) - if text[-1] == '.': - text = text[:-1] + if length != len(pp): + raise 'IN LINEIII IS THE ERR', `i` + + elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'): + #\xxxsection{A} ----> + # @node A, , , + # @xxxsection A + ## also: remove commas and quotes + ch.chtype = chunk_type(CSLINE) + length, newi = getnextarg(length, buf, pp, i) + afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename') + if afternodenamecmd < 0: + cp1 = crcopy(pp[i:newi]) + pp[i:newi] = [ + chunk(GROUP, ch.where, + pp[i:newi])] + length, newi = length - (newi-i) + 1, i+1 + text = flattext(buf, cp1) + text = invent_node_names(text) + else: + length, endarg = getnextarg(length, buf, pp, afternodenamecmd) + cp1 = crcopy(pp[afternodenamecmd:endarg]) + del pp[newi:endarg] + length = length - (endarg-newi) + + pp[i:newi] = [ + chunk(GROUP, ch.where, + pp[i:newi])] + length, newi = length - (newi-i) + 1, i + 1 + text = flattext(buf, cp1) + if text[-1] == '.': + text = text[:-1] ## print 'FLATTEXT:', `text` - if text in hist.nodenames: - print 'WARNING: node name ' + `text` + ' already used' - out.doublenodes.append(text) - else: - hist.nodenames.append(text) - text = rm_commas_etc(text) - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'node'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, text+', , ,')\ - ])] - i, length = newi+2, length+2 - - elif s(buf,ch.data) == 'funcline': - # fold it to a very short environment - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - length, i = do_funcdesc(length, buf, pp, i) - - elif s(buf,ch.data) == 'dataline': - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - length, i = do_datadesc(length, buf, pp, i) - - elif s(buf,ch.data) == 'excline': - pp[i-1:i-1] = [\ - chunk(CSLINE, ch.where, 'end'), \ - chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, hist.command)])] - i, length = i+2, length+2 - length, i = do_excdesc(length, buf, pp, i) - - - elif s(buf, ch.data) == 'index': - #\index{A} ---> - # @cindex A - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - length, newi = getnextarg(length, buf, pp, i) - - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - elif s(buf, ch.data) == 'bifuncindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'findex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(built-in function)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'obindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'findex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(object)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'opindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'findex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(operator)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'bimodindex': - ch.chtype = chunk_type(CSLINE) - ch.data = 'pindex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(built-in)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - elif s(buf, ch.data) == 'sectcode': - ch.data = 'code' - - - elif s(buf, ch.data) == 'stmodindex': - ch.chtype = chunk_type(CSLINE) - # use the program index as module index - ch.data = 'pindex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = pp[i:newi] - del pp[i:newi] - length = length - (newi-i) - - ingroupch.append(chunk(PLAIN, ch.where, ' ')) - ingroupch.append(chunk(CSNAME, ch.where, 'r')) - ingroupch.append(chunk(GROUP, ch.where, [\ - chunk(PLAIN, ch.where, \ - '(standard)')])) - - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - length, i = length+1, i+1 - - - elif s(buf, ch.data) == 'stindex': - # XXX must actually go to newindex st - wh = ch.where - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - length, newi = getnextarg(length, buf, pp, i) - ingroupch = [chunk(CSNAME, wh, 'code'), \ - chunk(GROUP, wh, pp[i:newi])] - - del pp[i:newi] - length = length - (newi-i) - - t = ingroupch[:] - t.append(chunk(PLAIN, wh, ' statement')) - - pp.insert(i, chunk(GROUP, wh, t)) - i, length = i+1, length+1 - - pp.insert(i, chunk(CSLINE, wh, 'cindex')) - i, length = i+1, length+1 - - t = ingroupch[:] - t.insert(0, chunk(PLAIN, wh, 'statement, ')) - - pp.insert(i, chunk(GROUP, wh, t)) - i, length = i+1, length+1 - - - elif s(buf, ch.data) == 'indexii': - #\indexii{A}{B} ---> - # @cindex A B - # @cindex B, A - length, newi = getnextarg(length, buf, pp, i) - cp11 = pp[i:newi] - cp21 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp12 = pp[i:newi] - cp22 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - pp.insert(i, chunk(GROUP, ch.where, cp11 + [\ - chunk(PLAIN, ch.where, ' ')] + cp12)) - i, length = i+1, length+1 - pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ - chunk(GROUP, ch.where, cp22 + [\ - chunk(PLAIN, ch.where, ', ')]+ cp21)] - i, length = i+2, length+2 - - elif s(buf, ch.data) == 'indexiii': - length, newi = getnextarg(length, buf, pp, i) - cp11 = pp[i:newi] - cp21 = crcopy(pp[i:newi]) - cp31 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp12 = pp[i:newi] - cp22 = crcopy(pp[i:newi]) - cp32 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp13 = pp[i:newi] - cp23 = crcopy(pp[i:newi]) - cp33 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - pp.insert(i, chunk(GROUP, ch.where, cp11 + [\ - chunk(PLAIN, ch.where, ' ')] + cp12 \ - + [chunk(PLAIN, ch.where, ' ')] \ - + cp13)) - i, length = i+1, length+1 - pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ - chunk(GROUP, ch.where, cp22 + [\ - chunk(PLAIN, ch.where, ' ')]+ cp23\ - + [chunk(PLAIN, ch.where, ', ')] +\ - cp21)] - i, length = i+2, length+2 - pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ - chunk(GROUP, ch.where, cp33 + [\ - chunk(PLAIN, ch.where, ', ')]+ cp31\ - + [chunk(PLAIN, ch.where, ' ')] +\ - cp32)] - i, length = i+2, length+2 - - - elif s(buf, ch.data) == 'indexiv': - length, newi = getnextarg(length, buf, pp, i) - cp11 = pp[i:newi] - cp21 = crcopy(pp[i:newi]) - cp31 = crcopy(pp[i:newi]) - cp41 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp12 = pp[i:newi] - cp22 = crcopy(pp[i:newi]) - cp32 = crcopy(pp[i:newi]) - cp42 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp13 = pp[i:newi] - cp23 = crcopy(pp[i:newi]) - cp33 = crcopy(pp[i:newi]) - cp43 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - length, newi = getnextarg(length, buf, pp, i) - cp14 = pp[i:newi] - cp24 = crcopy(pp[i:newi]) - cp34 = crcopy(pp[i:newi]) - cp44 = crcopy(pp[i:newi]) - del pp[i:newi] - length = length - (newi-i) - - ch.chtype = chunk_type(CSLINE) - ch.data = 'cindex' - ingroupch = cp11 + \ - spacech + cp12 + \ - spacech + cp13 + \ - spacech + cp14 - pp.insert(i, chunk(GROUP, ch.where, ingroupch)) - i, length = i+1, length+1 - ingroupch = cp22 + \ - spacech + cp23 + \ - spacech + cp24 + \ - commach + cp21 - pp[i:i] = cindexch + [\ - chunk(GROUP, ch.where, ingroupch)] - i, length = i+2, length+2 - ingroupch = cp33 + \ - spacech + cp34 + \ - commach + cp31 + \ - spacech + cp32 - pp[i:i] = cindexch + [\ - chunk(GROUP, ch.where, ingroupch)] - i, length = i+2, length+2 - ingroupch = cp44 + \ - commach + cp41 + \ - spacech + cp42 + \ - spacech + cp43 - pp[i:i] = cindexch + [\ - chunk(GROUP, ch.where, ingroupch)] - i, length = i+2, length+2 - - - - else: - print 'don\'t know what to do with keyword ' + `s(buf, ch.data)` - - - + if text in hist.nodenames: + print 'WARNING: node name ' + `text` + ' already used' + out.doublenodes.append(text) + else: + hist.nodenames.append(text) + text = rm_commas_etc(text) + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'node'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, text+', , ,') + ])] + i, length = newi+2, length+2 + + elif s(buf,ch.data) == 'funcline': + # fold it to a very short environment + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + length, i = do_funcdesc(length, buf, pp, i) + + elif s(buf,ch.data) == 'dataline': + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + length, i = do_datadesc(length, buf, pp, i) + + elif s(buf,ch.data) == 'excline': + pp[i-1:i-1] = [ + chunk(CSLINE, ch.where, 'end'), + chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, hist.command)])] + i, length = i+2, length+2 + length, i = do_excdesc(length, buf, pp, i) + + + elif s(buf, ch.data) == 'index': + #\index{A} ---> + # @cindex A + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + length, newi = getnextarg(length, buf, pp, i) + + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + elif s(buf, ch.data) == 'bifuncindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'findex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(built-in function)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'obindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'findex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(object)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'opindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'findex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(operator)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'bimodindex': + ch.chtype = chunk_type(CSLINE) + ch.data = 'pindex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(built-in)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + elif s(buf, ch.data) == 'sectcode': + ch.data = 'code' + + + elif s(buf, ch.data) == 'stmodindex': + ch.chtype = chunk_type(CSLINE) + # use the program index as module index + ch.data = 'pindex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = pp[i:newi] + del pp[i:newi] + length = length - (newi-i) + + ingroupch.append(chunk(PLAIN, ch.where, ' ')) + ingroupch.append(chunk(CSNAME, ch.where, 'r')) + ingroupch.append(chunk(GROUP, ch.where, [ + chunk(PLAIN, ch.where, + '(standard)')])) + + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + length, i = length+1, i+1 + + + elif s(buf, ch.data) == 'stindex': + # XXX must actually go to newindex st + wh = ch.where + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + length, newi = getnextarg(length, buf, pp, i) + ingroupch = [chunk(CSNAME, wh, 'code'), + chunk(GROUP, wh, pp[i:newi])] + + del pp[i:newi] + length = length - (newi-i) + + t = ingroupch[:] + t.append(chunk(PLAIN, wh, ' statement')) + + pp.insert(i, chunk(GROUP, wh, t)) + i, length = i+1, length+1 + + pp.insert(i, chunk(CSLINE, wh, 'cindex')) + i, length = i+1, length+1 + + t = ingroupch[:] + t.insert(0, chunk(PLAIN, wh, 'statement, ')) + + pp.insert(i, chunk(GROUP, wh, t)) + i, length = i+1, length+1 + + + elif s(buf, ch.data) == 'indexii': + #\indexii{A}{B} ---> + # @cindex A B + # @cindex B, A + length, newi = getnextarg(length, buf, pp, i) + cp11 = pp[i:newi] + cp21 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp12 = pp[i:newi] + cp22 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + pp.insert(i, chunk(GROUP, ch.where, cp11 + [ + chunk(PLAIN, ch.where, ' ')] + cp12)) + i, length = i+1, length+1 + pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), + chunk(GROUP, ch.where, cp22 + [ + chunk(PLAIN, ch.where, ', ')]+ cp21)] + i, length = i+2, length+2 + + elif s(buf, ch.data) == 'indexiii': + length, newi = getnextarg(length, buf, pp, i) + cp11 = pp[i:newi] + cp21 = crcopy(pp[i:newi]) + cp31 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp12 = pp[i:newi] + cp22 = crcopy(pp[i:newi]) + cp32 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp13 = pp[i:newi] + cp23 = crcopy(pp[i:newi]) + cp33 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + pp.insert(i, chunk(GROUP, ch.where, cp11 + [ + chunk(PLAIN, ch.where, ' ')] + cp12 + + [chunk(PLAIN, ch.where, ' ')] + + cp13)) + i, length = i+1, length+1 + pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), + chunk(GROUP, ch.where, cp22 + [ + chunk(PLAIN, ch.where, ' ')]+ cp23 + + [chunk(PLAIN, ch.where, ', ')] + + cp21)] + i, length = i+2, length+2 + pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), + chunk(GROUP, ch.where, cp33 + [ + chunk(PLAIN, ch.where, ', ')]+ cp31 + + [chunk(PLAIN, ch.where, ' ')] + + cp32)] + i, length = i+2, length+2 + + + elif s(buf, ch.data) == 'indexiv': + length, newi = getnextarg(length, buf, pp, i) + cp11 = pp[i:newi] + cp21 = crcopy(pp[i:newi]) + cp31 = crcopy(pp[i:newi]) + cp41 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp12 = pp[i:newi] + cp22 = crcopy(pp[i:newi]) + cp32 = crcopy(pp[i:newi]) + cp42 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp13 = pp[i:newi] + cp23 = crcopy(pp[i:newi]) + cp33 = crcopy(pp[i:newi]) + cp43 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + length, newi = getnextarg(length, buf, pp, i) + cp14 = pp[i:newi] + cp24 = crcopy(pp[i:newi]) + cp34 = crcopy(pp[i:newi]) + cp44 = crcopy(pp[i:newi]) + del pp[i:newi] + length = length - (newi-i) + + ch.chtype = chunk_type(CSLINE) + ch.data = 'cindex' + ingroupch = cp11 + \ + spacech + cp12 + \ + spacech + cp13 + \ + spacech + cp14 + pp.insert(i, chunk(GROUP, ch.where, ingroupch)) + i, length = i+1, length+1 + ingroupch = cp22 + \ + spacech + cp23 + \ + spacech + cp24 + \ + commach + cp21 + pp[i:i] = cindexch + [ + chunk(GROUP, ch.where, ingroupch)] + i, length = i+2, length+2 + ingroupch = cp33 + \ + spacech + cp34 + \ + commach + cp31 + \ + spacech + cp32 + pp[i:i] = cindexch + [ + chunk(GROUP, ch.where, ingroupch)] + i, length = i+2, length+2 + ingroupch = cp44 + \ + commach + cp41 + \ + spacech + cp42 + \ + spacech + cp43 + pp[i:i] = cindexch + [ + chunk(GROUP, ch.where, ingroupch)] + i, length = i+2, length+2 + + + + else: + print 'don\'t know what to do with keyword ' + `s(buf, ch.data)` + + + re_atsign = regex.compile('[@{}]') re_newline = regex.compile('\n') def dumpit(buf, wm, pp): - global out - - i, length = 0, len(pp) + global out - addspace = 0 - - while 1: - if len(pp) != length: - raise 'FATAL', 'inconsistent length' - if i == length: - break - ch = pp[i] - i = i + 1 + i, length = 0, len(pp) - if addspace: - dospace = 1 - addspace = 0 - else: - dospace = 0 - - if ch.chtype == chunk_type(CSNAME): - wm('@' + s(buf, ch.data)) - if s(buf, ch.data) == 'node' and \ - pp[i].chtype == chunk_type(PLAIN) and \ - s(buf, pp[i].data) in out.doublenodes: - ##XXX doesnt work yet?? - wm(' ZZZ-' + zfill(`i`, 4)) - if s(buf, ch.data)[0] in string.letters: - addspace = 1 - elif ch.chtype == chunk_type(PLAIN): - if dospace and s(buf, ch.data) not in (' ', '\t'): - wm(' ') - text = s(buf, ch.data) - while 1: - pos = re_atsign.search(text) - if pos < 0: - break - wm(text[:pos] + '@' + text[pos]) - text = text[pos+1:] - wm(text) - elif ch.chtype == chunk_type(GROUP): - wm('{') - dumpit(buf, wm, ch.data) - wm('}') - elif ch.chtype == chunk_type(DENDLINE): - wm('\n\n') - while i != length and pp[i].chtype in \ - (chunk_type(DENDLINE), chunk_type(ENDLINE)): - i = i + 1 - elif ch.chtype == chunk_type(OTHER): - wm(s(buf, ch.data)) - elif ch.chtype == chunk_type(ACTIVE): - wm(s(buf, ch.data)) - elif ch.chtype == chunk_type(ENDLINE): - wm('\n') - elif ch.chtype == chunk_type(CSLINE): - if i >= 2 and pp[i-2].chtype not in \ - (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ - and (pp[i-2].chtype != chunk_type(PLAIN) \ - or s(buf, pp[i-2].data)[-1] != '\n'): - - wm('\n') - wm('@' + s(buf, ch.data)) - if i == length: - raise error, 'CSLINE expected another chunk' - if pp[i].chtype != chunk_type(GROUP): - raise error, 'CSLINE expected GROUP' - if type(pp[i].data) != ListType: - raise error, 'GROUP chould contain []-data' - - wobj = Wobj() - dumpit(buf, wobj.write, pp[i].data) - i = i + 1 - text = wobj.data - del wobj - if text: - wm(' ') - while 1: - pos = re_newline.search(text) - if pos < 0: - break - print 'WARNING: found newline in csline arg' - wm(text[:pos] + ' ') - text = text[pos+1:] - wm(text) - if i >= length or \ - pp[i].chtype not in (chunk_type(CSLINE), \ - chunk_type(ENDLINE), chunk_type(DENDLINE)) \ - and (pp[i].chtype != chunk_type(PLAIN) \ - or s(buf, pp[i].data)[0] != '\n'): - wm('\n') - - elif ch.chtype == chunk_type(COMMENT): + addspace = 0 + + while 1: + if len(pp) != length: + raise 'FATAL', 'inconsistent length' + if i == length: + break + ch = pp[i] + i = i + 1 + + if addspace: + dospace = 1 + addspace = 0 + else: + dospace = 0 + + if ch.chtype == chunk_type(CSNAME): + wm('@' + s(buf, ch.data)) + if s(buf, ch.data) == 'node' and \ + pp[i].chtype == chunk_type(PLAIN) and \ + s(buf, pp[i].data) in out.doublenodes: + ##XXX doesnt work yet?? + wm(' ZZZ-' + zfill(`i`, 4)) + if s(buf, ch.data)[0] in string.letters: + addspace = 1 + elif ch.chtype == chunk_type(PLAIN): + if dospace and s(buf, ch.data) not in (' ', '\t'): + wm(' ') + text = s(buf, ch.data) + while 1: + pos = re_atsign.search(text) + if pos < 0: + break + wm(text[:pos] + '@' + text[pos]) + text = text[pos+1:] + wm(text) + elif ch.chtype == chunk_type(GROUP): + wm('{') + dumpit(buf, wm, ch.data) + wm('}') + elif ch.chtype == chunk_type(DENDLINE): + wm('\n\n') + while i != length and pp[i].chtype in \ + (chunk_type(DENDLINE), chunk_type(ENDLINE)): + i = i + 1 + elif ch.chtype == chunk_type(OTHER): + wm(s(buf, ch.data)) + elif ch.chtype == chunk_type(ACTIVE): + wm(s(buf, ch.data)) + elif ch.chtype == chunk_type(ENDLINE): + wm('\n') + elif ch.chtype == chunk_type(CSLINE): + if i >= 2 and pp[i-2].chtype not in \ + (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ + and (pp[i-2].chtype != chunk_type(PLAIN) + or s(buf, pp[i-2].data)[-1] != '\n'): + + wm('\n') + wm('@' + s(buf, ch.data)) + if i == length: + raise error, 'CSLINE expected another chunk' + if pp[i].chtype != chunk_type(GROUP): + raise error, 'CSLINE expected GROUP' + if type(pp[i].data) != ListType: + raise error, 'GROUP chould contain []-data' + + wobj = Wobj() + dumpit(buf, wobj.write, pp[i].data) + i = i + 1 + text = wobj.data + del wobj + if text: + wm(' ') + while 1: + pos = re_newline.search(text) + if pos < 0: + break + print 'WARNING: found newline in csline arg' + wm(text[:pos] + ' ') + text = text[pos+1:] + wm(text) + if i >= length or \ + pp[i].chtype not in (chunk_type(CSLINE), + chunk_type(ENDLINE), chunk_type(DENDLINE)) \ + and (pp[i].chtype != chunk_type(PLAIN) + or s(buf, pp[i].data)[0] != '\n'): + wm('\n') + + elif ch.chtype == chunk_type(COMMENT): ## print 'COMMENT: previous chunk =', pp[i-2] ## if pp[i-2].chtype == chunk_type(PLAIN): ## print 'PLAINTEXT =', `s(buf, pp[i-2].data)` - if s(buf, ch.data) and \ - regex.match('^[ \t]*$', s(buf, ch.data)) < 0: - if i >= 2 and pp[i-2].chtype not in \ - (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ - and not (pp[i-2].chtype == chunk_type(PLAIN) \ - and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0): - print 'ADDING NEWLINE' - wm('\n') - wm('@c ' + s(buf, ch.data)) - elif ch.chtype == chunk_type(IGNORE): - pass - else: - try: - str = `s(buf, ch.data)` - except TypeError: - str = `ch.data` - if len(str) > 400: - str = str[:400] + '...' - print 'warning:', ch.chtype, 'not handled, data ' + str + if s(buf, ch.data) and \ + regex.match('^[ \t]*$', s(buf, ch.data)) < 0: + if i >= 2 and pp[i-2].chtype not in \ + (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ + and not (pp[i-2].chtype == chunk_type(PLAIN) + and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0): + print 'ADDING NEWLINE' + wm('\n') + wm('@c ' + s(buf, ch.data)) + elif ch.chtype == chunk_type(IGNORE): + pass + else: + try: + str = `s(buf, ch.data)` + except TypeError: + str = `ch.data` + if len(str) > 400: + str = str[:400] + '...' + print 'warning:', ch.chtype, 'not handled, data ' + str def main(): - outfile = None - headerfile = 'texipre.dat' - trailerfile = 'texipost.dat' - - try: - opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:') - except getopt.error: - args = [] - - if not args: - print 'usage: partparse [-o outfile] [-h headerfile]', - print '[-t trailerfile] file ...' - sys.exit(2) - - for opt, arg in opts: - if opt == '-o': outfile = arg - if opt == '-h': headerfile = arg - if opt == '-t': trailerfile = arg - - if not outfile: - root, ext = os.path.splitext(args[0]) - outfile = root + '.texi' - - if outfile in args: - print 'will not overwrite input file', outfile - sys.exit(2) - - outf = open(outfile, 'w') - outf.write(open(headerfile, 'r').read()) - - for file in args: - if len(args) > 1: print '='*20, file, '='*20 - buf = open(file, 'r').read() - w, pp = parseit(buf) - startchange() - changeit(buf, pp) - dumpit(buf, outf.write, pp) - - outf.write(open(trailerfile, 'r').read()) - - outf.close() + outfile = None + headerfile = 'texipre.dat' + trailerfile = 'texipost.dat' + + try: + opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:') + except getopt.error: + args = [] + + if not args: + print 'usage: partparse [-o outfile] [-h headerfile]', + print '[-t trailerfile] file ...' + sys.exit(2) + + for opt, arg in opts: + if opt == '-o': outfile = arg + if opt == '-h': headerfile = arg + if opt == '-t': trailerfile = arg + + if not outfile: + root, ext = os.path.splitext(args[0]) + outfile = root + '.texi' + + if outfile in args: + print 'will not overwrite input file', outfile + sys.exit(2) + + outf = open(outfile, 'w') + outf.write(open(headerfile, 'r').read()) + + for file in args: + if len(args) > 1: print '='*20, file, '='*20 + buf = open(file, 'r').read() + w, pp = parseit(buf) + startchange() + changeit(buf, pp) + dumpit(buf, outf.write, pp) + + outf.write(open(trailerfile, 'r').read()) + + outf.close() if __name__ == "__main__": main() |