diff options
author | Guido van Rossum <guido@python.org> | 1993-12-14 10:08:02 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1993-12-14 10:08:02 (GMT) |
commit | 6930b3d18da1ce43d5a0c8099df66b8c59490465 (patch) | |
tree | e16dde1268eee9ad4c2eb3c5b9c21a6a6ec60753 /Demo/scripts/markov.py | |
parent | c4801ed5139ac860dece357af59d3fb522084e1c (diff) | |
download | cpython-6930b3d18da1ce43d5a0c8099df66b8c59490465.zip cpython-6930b3d18da1ce43d5a0c8099df66b8c59490465.tar.gz cpython-6930b3d18da1ce43d5a0c8099df66b8c59490465.tar.bz2 |
Initial revision
Diffstat (limited to 'Demo/scripts/markov.py')
-rwxr-xr-x | Demo/scripts/markov.py | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/Demo/scripts/markov.py b/Demo/scripts/markov.py new file mode 100755 index 0000000..6a6d2b2 --- /dev/null +++ b/Demo/scripts/markov.py @@ -0,0 +1,116 @@ +#! /usr/local/bin/python + +class Markov: + def __init__(self, histsize, choice): + self.histsize = histsize + self.choice = choice + self.trans = {} + def add(self, state, next): + if not self.trans.has_key(state): + self.trans[state] = [next] + else: + self.trans[state].append(next) + def put(self, seq): + n = self.histsize + add = self.add + add(None, seq[:0]) + for i in range(len(seq)): + add(seq[max(0, i-n):i], seq[i:i+1]) + add(seq[len(seq)-n:], None) + def get(self): + choice = self.choice + trans = self.trans + n = self.histsize + seq = choice(trans[None]) + while 1: + subseq = seq[max(0, len(seq)-n):] + options = trans[subseq] + next = choice(options) + if not next: break + seq = seq + next + return seq + +def test(): + import sys, string, whrandom, getopt + args = sys.argv[1:] + try: + opts, args = getopt.getopt(args, '0123456789cdw') + except getopt.error: + print 'Usage: markov [-#] [-cddqw] [file] ...' + print 'Options:' + print '-#: 1-digit history size (default 2)' + print '-c: characters (default)' + print '-w: words' + print '-d: more debugging output' + print '-q: no debugging output' + print 'Input files (default stdin) are split in paragraphs' + print 'separated blank lines and each paragraph is split' + print 'in words by whitespace, then reconcatenated with' + print 'exactly one space separating words.' + print 'Output consists of paragraphs separated by blank' + print 'lines, where lines are no longer than 72 characters.' + histsize = 2 + do_words = 0 + debug = 1 + for o, a in opts: + if '-0' <= o <= '-9': histsize = eval(o[1:]) + if o == '-c': do_words = 0 + if o == '-d': debug = debug + 1 + if o == '-q': debug = 0 + if o == '-w': do_words = 1 + if not args: args = ['-'] + m = Markov(histsize, whrandom.choice) + try: + for filename in args: + if filename == '-': + f = sys.stdin + if f.isatty(): + print 'Sorry, need stdin from file' + continue + else: + f = open(filename, 'r') + if debug: print 'processing', filename, '...' + text = f.read() + f.close() + paralist = string.splitfields(text, '\n\n') + for para in paralist: + if debug > 1: print 'feeding ...' + words = string.split(para) + if words: + if do_words: data = tuple(words) + else: data = string.joinfields(words, ' ') + m.put(data) + except KeyboardInterrupt: + print 'Interrupted -- continue with data read so far' + if not m.trans: + print 'No valid input files' + return + if debug: print 'done.' + if debug > 1: + for key in m.trans.keys(): + if key is None or len(key) < histsize: + print `key`, m.trans[key] + if histsize == 0: print `''`, m.trans[''] + print + while 1: + data = m.get() + if do_words: words = data + else: words = string.split(data) + n = 0 + limit = 72 + for w in words: + if n + len(w) > limit: + print + n = 0 + print w, + n = n + len(w) + 1 + print + print + +def tuple(list): + if len(list) == 0: return () + if len(list) == 1: return (list[0],) + i = len(list)/2 + return tuple(list[:i]) + tuple(list[i:]) + +test() |