import random import pickle class markov(dict): def __init__(self, order): self.order = order self.num = 0 self.d = {} def __repr__(self): return repr({ "order": self.order, "num": self.num, "data": self.d }) def __getitem__(self, x): return self.d[x] # Add list[-1] to the chain, given the preceding words list[:-1] def addword(self, list): if(len(list) != self.order + 1): return self.num += 1 if self.order == 0: self.d[list[0]] = self.d[list[0]] + 1 if list[0] in self.d else 1 else: if not list[0] in self.d: self.d[list[0]] = markov(self.order-1) self.d[list[0]].addword(list[1:]) # Generates a random starting point, i.e. words for use with next() def randomstart(self): r = random.randint(0, self.num-1) if self.order > 1: for key in self.d.iterkeys(): if r < self.d[key].num: ret = self.d[key].randomstart() ret.insert(0, key) return ret r -= self.d[key].num else: for key in self.d.iterkeys(): if r < self.d[key].num: return [ key ] r -= self.d[key].num # Choose a random word def next(self, list): if len(list) > self.order: return None if len(list) == 0: r = random.randint(0, self.num-1) if self.order == 0: for key in self.d.iterkeys(): if r < self.d[key]: return key r -= self.d[key] else: for key in self.d.iterkeys(): if r < self.d[key].num: return key r -= self.d[key].num else: if not list[0] in self.d: return None return self.d[list[0]].next(list[1:]) # Return some random text between two '\n' (not included) def getline(self): if not '\n' in self.d: return None ret = [] while len(ret) < 5: rs = self.d['\n'].randomstart() rs.insert(0, '\n') while rs[-1] == '\n': rs.append(self.next(rs)) del rs[0] ret = rs[:-1] while '\n' in ret: del ret[0:ret.index('\n')+1] while rs[-1] != None and rs[-1] != '\n': ret.append(rs[-1]) rs.append(self.next(rs)) del rs[0] return ' '.join(ret) def dump(self, filename): pickle.dump(self, open(filename, 'w')) @staticmethod def load(filename): return pickle.load(open(filename, 'r'))