Skip to content

Instantly share code, notes, and snippets.

@AnotherTwinkle
Last active June 2, 2022 17:53
Show Gist options
  • Save AnotherTwinkle/e048aa91f1b9cabd0decac036ca04e57 to your computer and use it in GitHub Desktop.
Save AnotherTwinkle/e048aa91f1b9cabd0decac036ca04e57 to your computer and use it in GitHub Desktop.
An incredibly obfuscated Markov Chain.
import random
class MarkovChain(object):
def tuplemaker(self,words,length):
'''Creates the entries for the training dictionary, tuple[0]
is the key and the rest are values'''
for x in range(0,len(words)-1):
try:
yield tuple(words[x:x+length]) #Using yield because this list tends to get very big
except IndexError:
pass
def dictmaker(self,tuples: list):
'''Creates the dictionary from the list of tuples we generated eariler'''
dictionary = {}
for tup in tuples:
if tup[0] in dictionary.keys(): #Append to the value list if the word is alread a key
dictionary[tup[0]].append(tup[1:len(tup)])
continue
dictionary[tup[0]] = [tup[1:len(tup)]] #Create the dictionary entry
return dictionary
def generate(self,words: list, length: int = 2, size: int = 100):
'''Generate our markov chain. You only need to call this.
Arguments:
words :- A list of words sorted in order of appearance in training paragraph. Using this
on randomly sorted words withour grammar wont really work.
size :- The size of the markov chain, or the amount of words to return.
length :- Length of each "entry". 2 means a normal markov chain. The bigger this number is,
the more less-random it gets and around 7-8 the algorithm can't find random
matches anymore, and directly outputs snippets from the input text.
(Tested on 4 harry potter books joined together. That's alot of data)
'''
if length < 2: # 2 is a normal, default markov chain, each entry is a pair. i.e {'come':'here'}
raise AttributeError('Value of argument "length" must be bigger than 2')
worddict = self.dictmaker(self.tuplemaker(words,length))
gen = [word for word in words if word[0].isupper()] #We want to start with a capital lettered word
chain =[random.choice(gen) if gen else random.choice(words)]
chain += list(random.choice(worddict[chain[0]]))
#Kickstart the chain
while len(chain) <= size:
for w in range(size+1): #I needed a number that the loop would never achieve.
if len(chain[w:len(chain)]) >= length:
continue #Ignore the first x entries, where x = length-2.
#This is because this genrator works more like a fill
#in the gaps simulator
after_words = chain[w+1:len(chain)] #The words after our current word
valid = [tup
for tup in worddict[chain[w]]
if not len(tup) < length-1
and tup[:len(tup)-1] == tuple(after_words)]
#Find vaild entries to insert
chain.append(random.choice(valid)[-1])
return ' '.join(chain)
#MarkovChain.generate(words,3,200)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment