Last active
June 2, 2022 17:53
-
-
Save AnotherTwinkle/e048aa91f1b9cabd0decac036ca04e57 to your computer and use it in GitHub Desktop.
An incredibly obfuscated Markov Chain.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
class MarkovChain(object): | |
def tuplemaker(self,words,length): | |
'''Creates the entries for the training dictionary, tuple[0] | |
is the key and the rest are values''' | |
for x in range(0,len(words)-1): | |
try: | |
yield tuple(words[x:x+length]) #Using yield because this list tends to get very big | |
except IndexError: | |
pass | |
def dictmaker(self,tuples: list): | |
'''Creates the dictionary from the list of tuples we generated eariler''' | |
dictionary = {} | |
for tup in tuples: | |
if tup[0] in dictionary.keys(): #Append to the value list if the word is alread a key | |
dictionary[tup[0]].append(tup[1:len(tup)]) | |
continue | |
dictionary[tup[0]] = [tup[1:len(tup)]] #Create the dictionary entry | |
return dictionary | |
def generate(self,words: list, length: int = 2, size: int = 100): | |
'''Generate our markov chain. You only need to call this. | |
Arguments: | |
words :- A list of words sorted in order of appearance in training paragraph. Using this | |
on randomly sorted words withour grammar wont really work. | |
size :- The size of the markov chain, or the amount of words to return. | |
length :- Length of each "entry". 2 means a normal markov chain. The bigger this number is, | |
the more less-random it gets and around 7-8 the algorithm can't find random | |
matches anymore, and directly outputs snippets from the input text. | |
(Tested on 4 harry potter books joined together. That's alot of data) | |
''' | |
if length < 2: # 2 is a normal, default markov chain, each entry is a pair. i.e {'come':'here'} | |
raise AttributeError('Value of argument "length" must be bigger than 2') | |
worddict = self.dictmaker(self.tuplemaker(words,length)) | |
gen = [word for word in words if word[0].isupper()] #We want to start with a capital lettered word | |
chain =[random.choice(gen) if gen else random.choice(words)] | |
chain += list(random.choice(worddict[chain[0]])) | |
#Kickstart the chain | |
while len(chain) <= size: | |
for w in range(size+1): #I needed a number that the loop would never achieve. | |
if len(chain[w:len(chain)]) >= length: | |
continue #Ignore the first x entries, where x = length-2. | |
#This is because this genrator works more like a fill | |
#in the gaps simulator | |
after_words = chain[w+1:len(chain)] #The words after our current word | |
valid = [tup | |
for tup in worddict[chain[w]] | |
if not len(tup) < length-1 | |
and tup[:len(tup)-1] == tuple(after_words)] | |
#Find vaild entries to insert | |
chain.append(random.choice(valid)[-1]) | |
return ' '.join(chain) | |
#MarkovChain.generate(words,3,200) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment