AnotherTwinkle · June 2, 2022 17:53
diff --git a/shitpost_markov_chain.py b/shitpost_markov_chain.py
 import random

 class MarkovChain(object):
 	def tuplemaker(self,words,length):
 		'''Creates the entries for the training dictionary, tuple[0]
 		is the key and the rest are values'''
 		
 		for x in range(0,len(words)-1):
 			try:
 				yield tuple(words[x:x+length]) #Using yield because this list tends to get very big
 			except IndexError:
 				pass

 	def dictmaker(self,tuples: list):
 		'''Creates the dictionary from the list of tuples we generated eariler'''
 		dictionary = {}

 		for tup in tuples:
 			if tup[0] in dictionary.keys(): #Append to the value list if the word is alread a key
 				dictionary[tup[0]].append(tup[1:len(tup)]) 
 				continue

 			dictionary[tup[0]] = [tup[1:len(tup)]] #Create the dictionary entry

 		return dictionary


 	def generate(self,words: list, length: int = 2, size: int = 100):

 		'''Generate our markov chain. You only need to call this.
 		
 		Arguments:
 			words :- A list of words sorted in order of appearance in training paragraph. Using this
 				on randomly sorted words withour grammar wont really work.
 				
 			size :- The size of the markov chain, or the amount of words to return.
 			
 			length :- Length of each "entry". 2 means a normal markov chain. The bigger this number is,
           			   the more less-random it gets and around 7-8 the algorithm can't find random
             				matches anymore, and directly outputs snippets from the input text.
           			   (Tested on 4 harry potter books joined together. That's alot of data)
      
 		'''

 		if length < 2: # 2 is a normal, default markov chain, each entry is a pair. i.e {'come':'here'}

 			raise AttributeError('Value of argument "length" must be bigger than 2')
 			
 		worddict = self.dictmaker(self.tuplemaker(words,length))

 		gen = [word for word in words if word[0].isupper()] #We want to start with a capital lettered word
 		chain =[random.choice(gen) if gen else random.choice(words)]								
 		chain += list(random.choice(worddict[chain[0]]))
 		#Kickstart the chain

 		while len(chain) <= size: 
 			for w in range(size+1): #I needed a number that the loop would never achieve. 
 				if len(chain[w:len(chain)]) >= length:
 					continue #Ignore the first x entries, where x = length-2.
 						#This is because this genrator works more like a fill
 						#in the gaps simulator
 				after_words = chain[w+1:len(chain)] #The words after our current word
 				valid = [tup
 					 for tup in worddict[chain[w]]
 					 if not len(tup) < length-1
 					 and tup[:len(tup)-1] == tuple(after_words)]
 				#Find vaild entries to insert
 				
 				chain.append(random.choice(valid)[-1])

 		return ' '.join(chain)

 	
 #MarkovChain.generate(words,3,200)
	import random

	class MarkovChain(object):
	def tuplemaker(self,words,length):
	'''Creates the entries for the training dictionary, tuple[0]
	is the key and the rest are values'''

	for x in range(0,len(words)-1):
	try:
	yield tuple(words[x:x+length]) #Using yield because this list tends to get very big
	except IndexError:
	pass

	def dictmaker(self,tuples: list):
	'''Creates the dictionary from the list of tuples we generated eariler'''
	dictionary = {}

	for tup in tuples:
	if tup[0] in dictionary.keys(): #Append to the value list if the word is alread a key
	dictionary[tup[0]].append(tup[1:len(tup)])
	continue

	dictionary[tup[0]] = [tup[1:len(tup)]] #Create the dictionary entry

	return dictionary


	def generate(self,words: list, length: int = 2, size: int = 100):

	'''Generate our markov chain. You only need to call this.

	Arguments:
	words :- A list of words sorted in order of appearance in training paragraph. Using this
	on randomly sorted words withour grammar wont really work.

	size :- The size of the markov chain, or the amount of words to return.

	length :- Length of each "entry". 2 means a normal markov chain. The bigger this number is,
	the more less-random it gets and around 7-8 the algorithm can't find random
	matches anymore, and directly outputs snippets from the input text.
	(Tested on 4 harry potter books joined together. That's alot of data)

	'''

	if length < 2: # 2 is a normal, default markov chain, each entry is a pair. i.e {'come':'here'}

	raise AttributeError('Value of argument "length" must be bigger than 2')

	worddict = self.dictmaker(self.tuplemaker(words,length))

	gen = [word for word in words if word[0].isupper()] #We want to start with a capital lettered word
	chain =[random.choice(gen) if gen else random.choice(words)]
	chain += list(random.choice(worddict[chain[0]]))
	#Kickstart the chain

	while len(chain) <= size:
	for w in range(size+1): #I needed a number that the loop would never achieve.
	if len(chain[w:len(chain)]) >= length:
	continue #Ignore the first x entries, where x = length-2.
	#This is because this genrator works more like a fill
	#in the gaps simulator
	after_words = chain[w+1:len(chain)] #The words after our current word
	valid = [tup
	for tup in worddict[chain[w]]
	if not len(tup) < length-1
	and tup[:len(tup)-1] == tuple(after_words)]
	#Find vaild entries to insert

	chain.append(random.choice(valid)[-1])

	return ' '.join(chain)


	#MarkovChain.generate(words,3,200)