Created
March 29, 2021 02:05
-
-
Save Ceasar/61138d16f3daeb35ce2c6cb8af7cdd36 to your computer and use it in GitHub Desktop.
Markov chain generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import random | |
def gen_ngrams(letters, n=2): | |
ngram = collections.deque([None] * n, n) | |
for letter in letters: | |
ngram.append(letter) | |
yield tuple(ngram) | |
ngram.append(None) | |
yield tuple(ngram) | |
def get_ngram_weights(words, n=2): | |
ngram_weights = collections.defaultdict(lambda: collections.Counter()) | |
for word in words: | |
ngrams = gen_ngrams(word, n=n) | |
for ngram in ngrams: | |
key = ngram[:-1] | |
key2 = ngram[-1] | |
ngram_weights[key][key2] += 1 | |
return ngram_weights | |
def choose_weighted_key(dict_items): | |
keys, weights = zip(*dict_items) | |
choices = random.choices(keys, weights=weights) | |
[key] = choices | |
return key | |
def make_name(lines, n=2): | |
bigrams = dict(get_ngram_weights(lines, n=n)) | |
word = [] | |
key = collections.deque([None] * (n - 1), n - 1) | |
items = list(bigrams[(None,) * (n - 1)].items()) | |
letter = choose_weighted_key(items) | |
while letter is not None: | |
word.append(letter) | |
key.append(letter) | |
items = bigrams[tuple(key)].items() | |
if not items: | |
return ''.join(word) | |
letter = choose_weighted_key(items) | |
return ''.join(word) | |
def gen_names(filename, n=2): | |
with open(filename) as fp: | |
lines = [line.strip() for line in fp.readlines()] | |
while True: | |
yield make_name(lines, n=n) | |
def main(filename, n=2): | |
for name in gen_names(filename, n): | |
print(name, flush=True) | |
if __name__ == '__main__': | |
import sys | |
main(sys.argv[1], int(sys.argv[2])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment