Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

markov.py@ 3298

Visit:

Last change on this file since 3298 was 3225, checked in by bird, 19 years ago
Python 2.5
File size: 3.7 KB

Line
1	#! /usr/bin/env python
2
3	class Markov:
4	def __init__(self, histsize, choice):
5	self.histsize = histsize
6	self.choice = choice
7	self.trans = {}
8	def add(self, state, next):
9	if not self.trans.has_key(state):
10	self.trans[state] = [next]
11	else:
12	self.trans[state].append(next)
13	def put(self, seq):
14	n = self.histsize
15	add = self.add
16	add(None, seq[:0])
17	for i in range(len(seq)):
18	add(seq[max(0, i-n):i], seq[i:i+1])
19	add(seq[len(seq)-n:], None)
20	def get(self):
21	choice = self.choice
22	trans = self.trans
23	n = self.histsize
24	seq = choice(trans[None])
25	while 1:
26	subseq = seq[max(0, len(seq)-n):]
27	options = trans[subseq]
28	next = choice(options)
29	if not next: break
30	seq = seq + next
31	return seq
32
33	def test():
34	import sys, string, random, getopt
35	args = sys.argv[1:]
36	try:
37	opts, args = getopt.getopt(args, '0123456789cdw')
38	except getopt.error:
39	print 'Usage: markov [-#] [-cddqw] [file] ...'
40	print 'Options:'
41	print '-#: 1-digit history size (default 2)'
42	print '-c: characters (default)'
43	print '-w: words'
44	print '-d: more debugging output'
45	print '-q: no debugging output'
46	print 'Input files (default stdin) are split in paragraphs'
47	print 'separated blank lines and each paragraph is split'
48	print 'in words by whitespace, then reconcatenated with'
49	print 'exactly one space separating words.'
50	print 'Output consists of paragraphs separated by blank'
51	print 'lines, where lines are no longer than 72 characters.'
52	histsize = 2
53	do_words = 0
54	debug = 1
55	for o, a in opts:
56	if '-0' <= o <= '-9': histsize = eval(o[1:])
57	if o == '-c': do_words = 0
58	if o == '-d': debug = debug + 1
59	if o == '-q': debug = 0
60	if o == '-w': do_words = 1
61	if not args: args = ['-']
62	m = Markov(histsize, random.choice)
63	try:
64	for filename in args: