Skip to content

Instantly share code, notes, and snippets.

@c-u-l8er
Created June 26, 2025 20:01
Show Gist options
  • Save c-u-l8er/2c66069391ff88e0a87ce7b9fa0460db to your computer and use it in GitHub Desktop.
Save c-u-l8er/2c66069391ff88e0a87ce7b9fa0460db to your computer and use it in GitHub Desktop.
-- https://claude.ai/chat/93a32ec2-3558-4662-aa9f-8241583007cf
{-# LANGUAGE OverloadedStrings #-}
module VMCompiler where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Word
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List (elemIndex)
import Control.Monad.State
import Control.Monad.Except
import Text.Parsec hiding (State)
import Text.Parsec.Text
import Text.Parsec.Language
import qualified Text.Parsec.Token as Token
-- ===== ABSTRACT SYNTAX TREE =====
data Expr
= EInt Integer
| EFloat Double
| EAtom Text
| ETuple [Expr]
| EList [Expr]
| EVar Text
| ECall Expr [Expr] -- Function call
| EBinOp BinOp Expr Expr
| EUnaryOp UnaryOp Expr
| EMatch Expr Expr -- Pattern matching
| ECase Expr [CaseClause]
| EReceive [ReceiveClause] (Maybe (Expr, Expr)) -- patterns, optional timeout
| ESend Expr Expr -- destination ! message
| ESpawn Expr Expr [Expr] -- spawn(Module, Function, Args)
| ELet Text Expr Expr -- let Var = Expr in Expr
| ESeq [Expr] -- Sequence of expressions
deriving (Show, Eq)
data BinOp
= Add | Sub | Mul | DivInt | DivFloat | Rem
| BAnd | BOr | BXor | BSL | BSR
| Eq | Ne | Lt | Le | Gt | Ge
deriving (Show, Eq)
data UnaryOp
= Neg | BNot
deriving (Show, Eq)
data CaseClause = CaseClause Expr Expr -- pattern -> body
deriving (Show, Eq)
data ReceiveClause = ReceiveClause Expr Expr -- pattern -> body
deriving (Show, Eq)
data Function = Function
{ funcName :: Text
, funcArity :: Int
, funcParams :: [Text]
, funcBody :: Expr
} deriving (Show, Eq)
data Module = Module
{ moduleName :: Text
, moduleExports :: [Text]
, moduleFunctions :: [Function]
} deriving (Show, Eq)
-- ===== VM BYTECODE TYPES =====
data OpCode
= PushLiteral | PushVar | Pop | Dup | Swap
| Add | Sub | Mul | DivInt | DivFloat | Rem
| BAnd | BOr | BXor | BNot | BSL | BSR
| Eq | Ne | Lt | Le | Gt | Ge
| IsAtom | IsNumber | IsTuple | IsList | IsBinary | IsFunction | IsPid
| Jump | JumpIfTrue | JumpIfFalse
| Call | CallExt | Ret | TryCase | CatchEnd
| Spawn | SpawnLink | SpawnMonitor
| Send | Receive | ReceiveTimeout
| Link | Unlink | Monitor | Demonitor | Exit
| MakeTuple | GetTupleElement | SetTupleElement
| MakeList | GetListHead | GetListTail
| MakeMap | GetMapValue | PutMapValue
| MakeBinary | BinaryPart
| Throw | Error | Apply | ApplyLast | GC
| NifStart | NifReturn | Halt | Nop
deriving (Show, Eq, Enum)
data Instruction = Instruction
{ opcode :: OpCode
, args :: [Word32]
} deriving (Show, Eq)
data Term
= TInt Integer
| TFloat Double
| TAtom Text
| TNil
deriving (Show, Eq)
data CompiledModule = CompiledModule
{ cmName :: Text
, cmExports :: [Text]
, cmCode :: [Instruction]
, cmLiterals :: [Term]
, cmAtoms :: [Text]
} deriving (Show, Eq)
-- ===== COMPILER STATE =====
data CompilerState = CompilerState
{ csLiterals :: [Term]
, csAtoms :: [Text]
, csVars :: Map Text Int -- Variable to register mapping
, csNextReg :: Int
, csLabels :: Map Text Int
, csCode :: [Instruction]
, csNextLabel :: Int
} deriving (Show)
type CompilerM = StateT CompilerState (Except Text)
initCompilerState :: CompilerState
initCompilerState = CompilerState [] [] Map.empty 0 Map.empty [] 0
-- ===== LEXER AND PARSER =====
languageDef :: LanguageDef ()
languageDef = emptyDef
{ Token.commentStart = "/*"
, Token.commentEnd = "*/"
, Token.commentLine = "%"
, Token.identStart = letter <|> char '_'
, Token.identLetter = alphaNum <|> char '_'
, Token.reservedNames = ["let", "in", "case", "of", "receive", "after",
"spawn", "true", "false", "nil"]
, Token.reservedOpNames = ["+", "-", "*", "div", "/", "rem", "band", "bor",
"bxor", "bnot", "bsl", "bsr", "==", "/=", "<",
"=<", ">", ">=", "!", "="]
, Token.caseSensitive = True
}
lexer :: Token.TokenParser ()
lexer = Token.makeTokenParser languageDef
identifier :: Parser Text
identifier = T.pack <$> Token.identifier lexer
reserved :: String -> Parser ()
reserved = Token.reserved lexer
reservedOp :: String -> Parser ()
reservedOp = Token.reservedOp lexer
parens :: Parser a -> Parser a
parens = Token.parens lexer
braces :: Parser a -> Parser a
braces = Token.braces lexer
brackets :: Parser a -> Parser a
brackets = Token.brackets lexer
comma :: Parser String
comma = Token.comma lexer
semi :: Parser String
semi = Token.semi lexer
integer :: Parser Integer
integer = Token.integer lexer
float :: Parser Double
float = Token.float lexer
stringLiteral :: Parser String
stringLiteral = Token.stringLiteral lexer
whiteSpace :: Parser ()
whiteSpace = Token.whiteSpace lexer
-- Expression parser
parseExpr :: Parser Expr
parseExpr = parseSeq
parseSeq :: Parser Expr
parseSeq = do
exprs <- parseAssign `sepBy1` comma
case exprs of
[e] -> return e
es -> return $ ESeq es
parseAssign :: Parser Expr
parseAssign = parseOr
parseOr :: Parser Expr
parseOr = chainl1 parseAnd (reservedOp ";" >> return (EBinOp BOr))
parseAnd :: Parser Expr
parseAnd = chainl1 parseComparison (reservedOp "," >> return (EBinOp BAnd))
parseComparison :: Parser Expr
parseComparison = chainl1 parseArithmetic parseCompOp
where
parseCompOp = choice
[ reservedOp "==" >> return (EBinOp Eq)
, reservedOp "/=" >> return (EBinOp Ne)
, reservedOp "=<" >> return (EBinOp Le)
, reservedOp ">=" >> return (EBinOp Ge)
, reservedOp "<" >> return (EBinOp Lt)
, reservedOp ">" >> return (EBinOp Gt)
]
parseArithmetic :: Parser Expr
parseArithmetic = chainl1 parseTerm parseArithOp
where
parseArithOp = choice
[ reservedOp "+" >> return (EBinOp Add)
, reservedOp "-" >> return (EBinOp Sub)
, reservedOp "*" >> return (EBinOp Mul)
, reservedOp "div" >> return (EBinOp DivInt)
, reservedOp "/" >> return (EBinOp DivFloat)
, reservedOp "rem" >> return (EBinOp Rem)
, reservedOp "band" >> return (EBinOp BAnd)
, reservedOp "bor" >> return (EBinOp BOr)
, reservedOp "bxor" >> return (EBinOp BXor)
, reservedOp "bsl" >> return (EBinOp BSL)
, reservedOp "bsr" >> return (EBinOp BSR)
]
parseTerm :: Parser Expr
parseTerm = parseUnary
parseUnary :: Parser Expr
parseUnary = choice
[ reservedOp "-" >> EUnaryOp Neg <$> parseUnary
, reservedOp "bnot" >> EUnaryOp BNot <$> parseUnary
, parseFactor
]
parseFactor :: Parser Expr
parseFactor = choice
[ parseNumber
, parseAtom
, parseVariable
, parseTuple
, parseList
, parseCall
, parseSend
, parseSpawn
, parseReceive
, parseCase
, parseLet
, parens parseExpr
]
parseNumber :: Parser Expr
parseNumber = try (EFloat <$> float) <|> (EInt <$> integer)
parseAtom :: Parser Expr
parseAtom = EAtom <$> parseAtomLiteral
where
parseAtomLiteral = choice
[ T.pack <$> stringLiteral
, reserved "true" >> return "true"
, reserved "false" >> return "false"
, reserved "nil" >> return "nil"
]
parseVariable :: Parser Expr
parseVariable = EVar <$> identifier
parseTuple :: Parser Expr
parseTuple = braces $ ETuple <$> parseExpr `sepBy` comma
parseList :: Parser Expr
parseList = brackets $ EList <$> parseExpr `sepBy` comma
parseCall :: Parser Expr
parseCall = do
func <- identifier
args <- parens (parseExpr `sepBy` comma)
return $ ECall (EVar func) args
parseSend :: Parser Expr
parseSend = do
dest <- parseFactor
reservedOp "!"
msg <- parseExpr
return $ ESend dest msg
parseSpawn :: Parser Expr
parseSpawn = do
reserved "spawn"
parens $ do
mod_expr <- parseExpr
comma
func_expr <- parseExpr
comma
args_expr <- parseExpr
return $ ESpawn mod_expr func_expr [args_expr]
parseReceive :: Parser Expr
parseReceive = do
reserved "receive"
clauses <- many parseReceiveClause
timeout <- optionMaybe parseAfter
reserved "end"
return $ EReceive clauses timeout
where
parseReceiveClause = do
pattern <- parseExpr
reservedOp "->"
body <- parseExpr
semi
return $ ReceiveClause pattern body
parseAfter = do
reserved "after"
timeout_expr <- parseExpr
reservedOp "->"
body <- parseExpr
return (timeout_expr, body)
parseCase :: Parser Expr
parseCase = do
reserved "case"
expr <- parseExpr
reserved "of"
clauses <- many parseCaseClause
reserved "end"
return $ ECase expr clauses
where
parseCaseClause = do
pattern <- parseExpr
reservedOp "->"
body <- parseExpr
semi
return $ CaseClause pattern body
parseLet :: Parser Expr
parseLet = do
reserved "let"
var <- identifier
reservedOp "="
value <- parseExpr
reserved "in"
body <- parseExpr
return $ ELet var value body
-- Function and module parsers
parseFunction :: Parser Function
parseFunction = do
name <- identifier
params <- parens (identifier `sepBy` comma)
reservedOp "->"
body <- parseExpr
return $ Function name (length params) params body
parseModule :: Parser Module
parseModule = do
reserved "module"
name <- identifier
whiteSpace
exports <- parseExports
functions <- many parseFunction
return $ Module name exports functions
where
parseExports = do
reserved "export"
brackets (identifier `sepBy` comma)
-- ===== COMPILER IMPLEMENTATION =====
addLiteral :: Term -> CompilerM Int
addLiteral term = do
st <- get
case elemIndex term (csLiterals st) of
Just idx -> return idx
Nothing -> do
let idx = length (csLiterals st)
put st { csLiterals = csLiterals st ++ [term] }
return idx
addAtom :: Text -> CompilerM Int
addAtom atom = do
st <- get
case elemIndex atom (csAtoms st) of
Just idx -> return idx
Nothing -> do
let idx = length (csAtoms st)
put st { csAtoms = csAtoms st ++ [atom] }
return idx
allocReg :: Text -> CompilerM Int
allocReg var = do
st <- get
case Map.lookup var (csVars st) of
Just reg -> return reg
Nothing -> do
let reg = csNextReg st
put st { csVars = Map.insert var reg (csVars st)
, csNextReg = reg + 1 }
return reg
emit :: OpCode -> [Word32] -> CompilerM ()
emit op args = do
st <- get
let instr = Instruction op args
put st { csCode = csCode st ++ [instr] }
compileExpr :: Expr -> CompilerM ()
compileExpr expr = case expr of
EInt n -> do
idx <- addLiteral (TInt n)
emit PushLiteral [fromIntegral idx]
EFloat f -> do
idx <- addLiteral (TFloat f)
emit PushLiteral [fromIntegral idx]
EAtom a -> do
idx <- addLiteral (TAtom a)
emit PushLiteral [fromIntegral idx]
EVar var -> do
reg <- allocReg var
emit PushVar [fromIntegral reg]
ETuple exprs -> do
mapM_ compileExpr exprs
emit MakeTuple [fromIntegral $ length exprs]
EList exprs -> do
mapM_ compileExpr exprs
emit MakeList [fromIntegral $ length exprs]
EBinOp op lhs rhs -> do
compileExpr lhs
compileExpr rhs
emit (binOpToOpCode op) []
EUnaryOp op expr -> do
compileExpr expr
case op of
Neg -> do
-- Push 0 and subtract
zeroIdx <- addLiteral (TInt 0)
emit PushLiteral [fromIntegral zeroIdx]
emit Swap []
emit Sub []
BNot -> emit BNot []
ECall func args -> do
mapM_ compileExpr args
compileExpr func
emit Call [fromIntegral $ length args]
ESend dest msg -> do
compileExpr dest
compileExpr msg
emit Send []
ESpawn modExpr funcExpr args -> do
compileExpr modExpr
compileExpr funcExpr
mapM_ compileExpr args
emit Spawn []
ELet var value body -> do
compileExpr value
reg <- allocReg var
emit Pop [] -- Store in register (simplified)
compileExpr body
ESeq exprs -> mapM_ compileExpr exprs
EReceive clauses timeout -> do
-- Simplified receive compilation
case timeout of
Nothing -> emit Receive []
Just (timeoutExpr, _) -> do
compileExpr timeoutExpr
emit ReceiveTimeout []
ECase expr clauses -> do
-- Simplified case compilation
compileExpr expr
-- Would need more sophisticated pattern matching
emit Nop []
EMatch _ _ ->
throwError "Pattern matching not yet implemented"
binOpToOpCode :: BinOp -> OpCode
binOpToOpCode op = case op of
Add -> Add
Sub -> Sub
Mul -> Mul
DivInt -> DivInt
DivFloat -> DivFloat
Rem -> Rem
BAnd -> BAnd
BOr -> BOr
BXor -> BXor
BSL -> BSL
BSR -> BSR
Eq -> Eq
Ne -> Ne
Lt -> Lt
Le -> Le
Gt -> Gt
Ge -> Ge
compileFunction :: Function -> CompilerM [Instruction]
compileFunction func = do
-- Reset state for new function
modify $ \st -> st { csVars = Map.empty, csNextReg = 0, csCode = [] }
-- Allocate registers for parameters
mapM_ allocReg (funcParams func)
-- Compile function body
compileExpr (funcBody func)
-- Add return instruction
emit Ret []
-- Return generated code
gets csCode
compileModule :: Module -> CompilerM CompiledModule
compileModule mod = do
-- Reset compiler state
put initCompilerState
-- Add module name to atoms
_ <- addAtom (moduleName mod)
-- Compile all functions
allCode <- concat <$> mapM compileFunction (moduleFunctions mod)
-- Get final state
st <- get
return $ CompiledModule
{ cmName = moduleName mod
, cmExports = moduleExports mod
, cmCode = allCode
, cmLiterals = csLiterals st
, cmAtoms = csAtoms st
}
-- ===== CODE GENERATION =====
generateZigInstruction :: Instruction -> Text
generateZigInstruction (Instruction op args) =
case args of
[] -> T.concat ["Instruction.make(.", T.pack (show op), ")"]
[a] -> T.concat ["Instruction.make1(.", T.pack (show op), ", ", T.pack (show a), ")"]
[a, b] -> T.concat ["Instruction.make2(.", T.pack (show op), ", ", T.pack (show a), ", ", T.pack (show b), ")"]
[a, b, c] -> T.concat ["Instruction.make3(.", T.pack (show op), ", ", T.pack (show a), ", ", T.pack (show b), ", ", T.pack (show c), ")"]
_ -> T.concat ["Instruction.make(.", T.pack (show op), ") // TODO: handle more args"]
generateZigTerm :: Term -> Text
generateZigTerm term = case term of
TInt n -> T.concat ["Term.make_integer(", T.pack (show n), ")"]
TFloat f -> T.concat ["Term{ .tag = .float, .data = .{ .float = ", T.pack (show f), " } }"]
TAtom a -> T.concat ["Term.make_atom(try vm.atom_table.intern(\"", a, "\"))"]
TNil -> "Term.make_nil()"
generateZigCode :: CompiledModule -> Text
generateZigCode cm = T.unlines
[ "// Generated code for module: " <> cmName cm
, ""
, "const " <> cmName cm <> "_code = [_]Instruction{"
, T.unlines (map ((" " <>) . (<> ",") . generateZigInstruction) (cmCode cm))
, "};"
, ""
, "const " <> cmName cm <> "_literals = [_]Term{"
, T.unlines (map ((" " <>) . (<> ",") . generateZigTerm) (cmLiterals cm))
, "};"
]
-- ===== MAIN COMPILER INTERFACE =====
compile :: Text -> Either Text CompiledModule
compile source = do
-- Parse the source code
mod <- case parse parseModule "source" source of
Left err -> Left (T.pack $ show err)
Right mod -> Right mod
-- Compile the module
case runExcept (evalStateT (compileModule mod) initCompilerState) of
Left err -> Left err
Right compiled -> Right compiled
compileToZig :: Text -> Either Text Text
compileToZig source = do
compiled <- compile source
return $ generateZigCode compiled
-- ===== EXAMPLE USAGE =====
exampleSource :: Text
exampleSource = T.unlines
[ "module example"
, "export [factorial, fibonacci]"
, ""
, "factorial(0) -> 1"
, "factorial(N) -> N * factorial(N - 1)"
, ""
, "fibonacci(0) -> 0"
, "fibonacci(1) -> 1"
, "fibonacci(N) -> fibonacci(N - 1) + fibonacci(N - 2)"
, ""
, "main() ->"
, " let Result = factorial(5) in"
, " spawn(io, format, [\"Factorial: ~p~n\", [Result]])"
]
-- For testing
testCompiler :: IO ()
testCompiler = do
case compileToZig exampleSource of
Left err -> putStrLn $ "Compilation error: " ++ T.unpack err
Right zigCode -> putStrLn $ T.unpack zigCode
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment