diff options
Diffstat (limited to 'Demo/scripts/markov.py')
-rwxr-xr-x | Demo/scripts/markov.py | 64 |
1 files changed, 34 insertions, 30 deletions
diff --git a/Demo/scripts/markov.py b/Demo/scripts/markov.py index 7a4fc01..df4dec0 100755 --- a/Demo/scripts/markov.py +++ b/Demo/scripts/markov.py @@ -5,11 +5,10 @@ class Markov: self.histsize = histsize self.choice = choice self.trans = {} + def add(self, state, next): - if not self.trans.has_key(state): - self.trans[state] = [next] - else: - self.trans[state].append(next) + self.trans.setdefault(state, []).append(next) + def put(self, seq): n = self.histsize add = self.add @@ -17,26 +16,29 @@ class Markov: for i in range(len(seq)): add(seq[max(0, i-n):i], seq[i:i+1]) add(seq[len(seq)-n:], None) + def get(self): choice = self.choice trans = self.trans n = self.histsize seq = choice(trans[None]) - while 1: + while True: subseq = seq[max(0, len(seq)-n):] options = trans[subseq] next = choice(options) - if not next: break - seq = seq + next + if not next: + break + seq += next return seq + def test(): - import sys, string, random, getopt + import sys, random, getopt args = sys.argv[1:] try: - opts, args = getopt.getopt(args, '0123456789cdw') + opts, args = getopt.getopt(args, '0123456789cdwq') except getopt.error: - print 'Usage: markov [-#] [-cddqw] [file] ...' + print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0] print 'Options:' print '-#: 1-digit history size (default 2)' print '-c: characters (default)' @@ -49,16 +51,19 @@ def test(): print 'exactly one space separating words.' print 'Output consists of paragraphs separated by blank' print 'lines, where lines are no longer than 72 characters.' + sys.exit(2) histsize = 2 - do_words = 0 + do_words = False debug = 1 for o, a in opts: - if '-0' <= o <= '-9': histsize = eval(o[1:]) - if o == '-c': do_words = 0 - if o == '-d': debug = debug + 1 + if '-0' <= o <= '-9': histsize = int(o[1:]) + if o == '-c': do_words = False + if o == '-d': debug += 1 if o == '-q': debug = 0 - if o == '-w': do_words = 1 - if not args: args = ['-'] + if o == '-w': do_words = True + if not args: + args = ['-'] + m = Markov(histsize, random.choice) try: for filename in args: @@ -72,13 +77,15 @@ def test(): if debug: print 'processing', filename, '...' text = f.read() f.close() - paralist = string.splitfields(text, '\n\n') + paralist = text.split('\n\n') for para in paralist: if debug > 1: print 'feeding ...' - words = string.split(para) + words = para.split() if words: - if do_words: data = tuple(words) - else: data = string.joinfields(words, ' ') + if do_words: + data = tuple(words) + else: + data = ' '.join(words) m.put(data) except KeyboardInterrupt: print 'Interrupted -- continue with data read so far' @@ -86,16 +93,19 @@ def test(): print 'No valid input files' return if debug: print 'done.' + if debug > 1: for key in m.trans.keys(): if key is None or len(key) < histsize: print repr(key), m.trans[key] if histsize == 0: print repr(''), m.trans[''] print - while 1: + while True: data = m.get() - if do_words: words = data - else: words = string.split(data) + if do_words: + words = data + else: + words = data.split() n = 0 limit = 72 for w in words: @@ -103,15 +113,9 @@ def test(): print n = 0 print w, - n = n + len(w) + 1 + n += len(w) + 1 print print -def tuple(list): - if len(list) == 0: return () - if len(list) == 1: return (list[0],) - i = len(list)//2 - return tuple(list[:i]) + tuple(list[i:]) - if __name__ == "__main__": test() |