Lib/idlelib/FormatParagraph.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

# Extension to format a paragraph

# Does basic, standard text formatting, and also understands Python
# comment blocks.  Thus, for editing Python source code, this
# extension is really only suitable for reformatting these comment
# blocks or triple-quoted strings.

# Known problems with comment reformatting:
# * If there is a selection marked, and the first line of the
#   selection is not complete, the block will probably not be detected
#   as comments, and will have the normal "text formatting" rules
#   applied.
# * If a comment block has leading whitespace that mixes tabs and
#   spaces, they will not be considered part of the same block.
# * Fancy comments, like this bulleted list, arent handled :-)

import re
from .configHandler import idleConf

class FormatParagraph:

    menudefs = [
        ('format', [   # /s/edit/format   dscherer@cmu.edu
            ('Format Paragraph', '<<format-paragraph>>'),
         ])
    ]

    def __init__(self, editwin):
        self.editwin = editwin

    def close(self):
        self.editwin = None

    def format_paragraph_event(self, event):
        maxformatwidth = int(idleConf.GetOption('main','FormatParagraph','paragraph'))
        text = self.editwin.text
        first, last = self.editwin.get_selection_indices()
        if first and last:
            data = text.get(first, last)
            comment_header = ''
        else:
            first, last, comment_header, data = \
                    find_paragraph(text, text.index("insert"))
        if comment_header:
            # Reformat the comment lines - convert to text sans header.
            lines = data.split("\n")
            lines = map(lambda st, l=len(comment_header): st[l:], lines)
            data = "\n".join(lines)
            # Reformat to maxformatwidth chars or a 20 char width, whichever is greater.
            format_width = max(maxformatwidth - len(comment_header), 20)
            newdata = reformat_paragraph(data, format_width)
            # re-split and re-insert the comment header.
            newdata = newdata.split("\n")
            # If the block ends in a \n, we dont want the comment
            # prefix inserted after it. (Im not sure it makes sense to
            # reformat a comment block that isnt made of complete
            # lines, but whatever!)  Can't think of a clean soltution,
            # so we hack away
            block_suffix = ""
            if not newdata[-1]:
                block_suffix = "\n"
                newdata = newdata[:-1]
            builder = lambda item, prefix=comment_header: prefix+item
            newdata = '\n'.join(map(builder, newdata)) + block_suffix
        else:
            # Just a normal text format
            newdata = reformat_paragraph(data, maxformatwidth)
        text.tag_remove("sel", "1.0", "end")
        if newdata != data:
            text.mark_set("insert", first)
            text.undo_block_start()
            text.delete(first, last)
            text.insert(first, newdata)
            text.undo_block_stop()
        else:
            text.mark_set("insert", last)
        text.see("insert")

def find_paragraph(text, mark):
    lineno, col = map(int, mark.split("."))
    line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line):
        lineno = lineno + 1
        line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    first_lineno = lineno
    comment_header = get_comment_header(line)
    comment_header_len = len(comment_header)
    while get_comment_header(line)==comment_header and \
              not is_all_white(line[comment_header_len:]):
        lineno = lineno + 1
        line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    last = "%d.0" % lineno
    # Search back to beginning of paragraph
    lineno = first_lineno - 1
    line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    while lineno > 0 and \
              get_comment_header(line)==comment_header and \
              not is_all_white(line[comment_header_len:]):
        lineno = lineno - 1
        line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    first = "%d.0" % (lineno+1)
    return first, last, comment_header, text.get(first, last)

def reformat_paragraph(data, limit):
    lines = data.split("\n")
    i = 0
    n = len(lines)
    while i < n and is_all_white(lines[i]):
        i = i+1
    if i >= n:
        return data
    indent1 = get_indent(lines[i])
    if i+1 < n and not is_all_white(lines[i+1]):
        indent2 = get_indent(lines[i+1])
    else:
        indent2 = indent1
    new = lines[:i]
    partial = indent1
    while i < n and not is_all_white(lines[i]):
        # XXX Should take double space after period (etc.) into account
        words = re.split("(\s+)", lines[i])
        for j in range(0, len(words), 2):
            word = words[j]
            if not word:
                continue # Can happen when line ends in whitespace
            if len((partial + word).expandtabs()) > limit and \
               partial != indent1:
                new.append(partial.rstrip())
                partial = indent2
            partial = partial + word + " "
            if j+1 < len(words) and words[j+1] != " ":
                partial = partial + " "
        i = i+1
    new.append(partial.rstrip())
    # XXX Should reformat remaining paragraphs as well
    new.extend(lines[i:])
    return "\n".join(new)

def is_all_white(line):
    return re.match(r"^\s*$", line) is not None

def get_indent(line):
    return re.match(r"^(\s*)", line).group()

def get_comment_header(line):
    m = re.match(r"^(\s*#*)", line)
    if m is None: return ""
    return m.group(1)