Last active
August 29, 2015 14:02
-
-
Save JulianBirch/96bc114654d72e89b8d1 to your computer and use it in GitHub Desktop.
Revised version of Kris Jenkin's Markov Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# OPTIONS_GHC -Wall #-} | |
{-# LANGUAGE RankNTypes #-} | |
module Main where | |
-- Lots more imports | |
import Control.Applicative | |
import Data.List | |
import qualified Data.Map as Map | |
import System.Random | |
-- No longer have an End here. This is because I'm using the absence of an entry | |
-- in the graph to indicate termination. More further down | |
data Node a = Node a | Start | End deriving (Eq, Ord, Show) | |
-- Redefined definition of Graph so that it doesn't depend on Node | |
-- This, in turn, allows subsequent functions to be node-independent | |
newtype Graph a = Graph (Map.Map a [a]) | |
-- oneOf moved close to usage | |
-- addHeadAndTail collapsed with lineToSentence | |
-- addPair renamed since the old name made no sense | |
-- Replaced the pairs logic with something that just destructures the head of the list | |
-- Then swapped the parameters to make it pointfree | |
addSuccessor :: Ord a => Graph a -> [a] -> Graph a | |
-- Use insertWith instead. My feeling is that insertWith isn't as nice as | |
-- Clojure's update-in, | |
-- in that it requires the inserted value to be the same type as the storage representation | |
addSuccessor (Graph t) (x:y:_) = Graph $ Map.insertWith (++) x [y] t | |
addSuccessor t _ = t | |
-- Used infix | |
-- flipped the arguments around to facilitate currying | |
oneOf :: RandomGen g => g -> [a] -> (a,g) | |
oneOf g xs = (xs !! n, g') | |
where (n,g') = randomR (0, length xs - 1) g | |
-- readChainStep modified to work with unfoldr | |
-- unfoldr is more useful than clojure's iterate function because it's got | |
-- termination built in. | |
-- Also modified to use the fact that Maybe is a Functor and so you can just | |
-- fmap everything | |
-- Since unfoldr expects a Maybe and lookup generates one, we lose a lot | |
-- of type rewriting book-keeping | |
-- Finally, removed the logic from this function that destructured Node, making it | |
-- Node independent | |
nextStep :: (Node a, g) -> Maybe (a, (Node a, g)) | |
nextStep p@(Node a, _) = Just (a, p) | |
nextStep _ = Nothing | |
-- Fixed the bug. Also discoved the first time I needed a monad | |
readChainStep :: (Ord a, RandomGen g) => Graph (Node a) -> ((Node a),g) -> Maybe (a,(Node a, g)) | |
readChainStep (Graph t) (p,g) = nextStep =<< oneOf g <$> Map.lookup p t | |
-- readChain folded into main | |
-- No longer adds end, | |
lineToSentence :: [a] -> [Node a] | |
lineToSentence xs = [Start] ++ fmap Node xs ++ [End] | |
-- Now uses unfoldr instead of readChain | |
main :: IO () | |
main = do | |
text <- readFile "corpus.txt" | |
g <- getStdGen | |
let productions = tails =<< lineToSentence <$> words <$> lines text | |
-- Instead of passing the sentences in and tearing them down, we concatMap | |
-- to tails to just get a list of productions. Then we only need one word | |
-- This basically makes the whole thing | |
-- file -> unfold to productions -> fold to graph -> unfold to output -> print output | |
tree = foldl addSuccessor (Graph Map.empty) productions | |
newChain = unfoldr (readChainStep tree) (Start,g) | |
print $ unwords newChain |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You probably meant something like
on line 65.
And it probably makes sense to include a data constructor like
Stop
marking the end of the sentence to allow the Markov chain to stop when picking a word that can appear both in the middle of a sentence as well as in the end.PS. Btw, found out about Pointfree when fixing it. Here's a nice setup for GHCi: