Created
June 26, 2025 20:01
-
-
Save c-u-l8er/2c66069391ff88e0a87ce7b9fa0460db to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- https://claude.ai/chat/93a32ec2-3558-4662-aa9f-8241583007cf | |
{-# LANGUAGE OverloadedStrings #-} | |
module VMCompiler where | |
import Data.Text (Text) | |
import qualified Data.Text as T | |
import Data.Word | |
import Data.Map (Map) | |
import qualified Data.Map as Map | |
import Data.List (elemIndex) | |
import Control.Monad.State | |
import Control.Monad.Except | |
import Text.Parsec hiding (State) | |
import Text.Parsec.Text | |
import Text.Parsec.Language | |
import qualified Text.Parsec.Token as Token | |
-- ===== ABSTRACT SYNTAX TREE ===== | |
data Expr | |
= EInt Integer | |
| EFloat Double | |
| EAtom Text | |
| ETuple [Expr] | |
| EList [Expr] | |
| EVar Text | |
| ECall Expr [Expr] -- Function call | |
| EBinOp BinOp Expr Expr | |
| EUnaryOp UnaryOp Expr | |
| EMatch Expr Expr -- Pattern matching | |
| ECase Expr [CaseClause] | |
| EReceive [ReceiveClause] (Maybe (Expr, Expr)) -- patterns, optional timeout | |
| ESend Expr Expr -- destination ! message | |
| ESpawn Expr Expr [Expr] -- spawn(Module, Function, Args) | |
| ELet Text Expr Expr -- let Var = Expr in Expr | |
| ESeq [Expr] -- Sequence of expressions | |
deriving (Show, Eq) | |
data BinOp | |
= Add | Sub | Mul | DivInt | DivFloat | Rem | |
| BAnd | BOr | BXor | BSL | BSR | |
| Eq | Ne | Lt | Le | Gt | Ge | |
deriving (Show, Eq) | |
data UnaryOp | |
= Neg | BNot | |
deriving (Show, Eq) | |
data CaseClause = CaseClause Expr Expr -- pattern -> body | |
deriving (Show, Eq) | |
data ReceiveClause = ReceiveClause Expr Expr -- pattern -> body | |
deriving (Show, Eq) | |
data Function = Function | |
{ funcName :: Text | |
, funcArity :: Int | |
, funcParams :: [Text] | |
, funcBody :: Expr | |
} deriving (Show, Eq) | |
data Module = Module | |
{ moduleName :: Text | |
, moduleExports :: [Text] | |
, moduleFunctions :: [Function] | |
} deriving (Show, Eq) | |
-- ===== VM BYTECODE TYPES ===== | |
data OpCode | |
= PushLiteral | PushVar | Pop | Dup | Swap | |
| Add | Sub | Mul | DivInt | DivFloat | Rem | |
| BAnd | BOr | BXor | BNot | BSL | BSR | |
| Eq | Ne | Lt | Le | Gt | Ge | |
| IsAtom | IsNumber | IsTuple | IsList | IsBinary | IsFunction | IsPid | |
| Jump | JumpIfTrue | JumpIfFalse | |
| Call | CallExt | Ret | TryCase | CatchEnd | |
| Spawn | SpawnLink | SpawnMonitor | |
| Send | Receive | ReceiveTimeout | |
| Link | Unlink | Monitor | Demonitor | Exit | |
| MakeTuple | GetTupleElement | SetTupleElement | |
| MakeList | GetListHead | GetListTail | |
| MakeMap | GetMapValue | PutMapValue | |
| MakeBinary | BinaryPart | |
| Throw | Error | Apply | ApplyLast | GC | |
| NifStart | NifReturn | Halt | Nop | |
deriving (Show, Eq, Enum) | |
data Instruction = Instruction | |
{ opcode :: OpCode | |
, args :: [Word32] | |
} deriving (Show, Eq) | |
data Term | |
= TInt Integer | |
| TFloat Double | |
| TAtom Text | |
| TNil | |
deriving (Show, Eq) | |
data CompiledModule = CompiledModule | |
{ cmName :: Text | |
, cmExports :: [Text] | |
, cmCode :: [Instruction] | |
, cmLiterals :: [Term] | |
, cmAtoms :: [Text] | |
} deriving (Show, Eq) | |
-- ===== COMPILER STATE ===== | |
data CompilerState = CompilerState | |
{ csLiterals :: [Term] | |
, csAtoms :: [Text] | |
, csVars :: Map Text Int -- Variable to register mapping | |
, csNextReg :: Int | |
, csLabels :: Map Text Int | |
, csCode :: [Instruction] | |
, csNextLabel :: Int | |
} deriving (Show) | |
type CompilerM = StateT CompilerState (Except Text) | |
initCompilerState :: CompilerState | |
initCompilerState = CompilerState [] [] Map.empty 0 Map.empty [] 0 | |
-- ===== LEXER AND PARSER ===== | |
languageDef :: LanguageDef () | |
languageDef = emptyDef | |
{ Token.commentStart = "/*" | |
, Token.commentEnd = "*/" | |
, Token.commentLine = "%" | |
, Token.identStart = letter <|> char '_' | |
, Token.identLetter = alphaNum <|> char '_' | |
, Token.reservedNames = ["let", "in", "case", "of", "receive", "after", | |
"spawn", "true", "false", "nil"] | |
, Token.reservedOpNames = ["+", "-", "*", "div", "/", "rem", "band", "bor", | |
"bxor", "bnot", "bsl", "bsr", "==", "/=", "<", | |
"=<", ">", ">=", "!", "="] | |
, Token.caseSensitive = True | |
} | |
lexer :: Token.TokenParser () | |
lexer = Token.makeTokenParser languageDef | |
identifier :: Parser Text | |
identifier = T.pack <$> Token.identifier lexer | |
reserved :: String -> Parser () | |
reserved = Token.reserved lexer | |
reservedOp :: String -> Parser () | |
reservedOp = Token.reservedOp lexer | |
parens :: Parser a -> Parser a | |
parens = Token.parens lexer | |
braces :: Parser a -> Parser a | |
braces = Token.braces lexer | |
brackets :: Parser a -> Parser a | |
brackets = Token.brackets lexer | |
comma :: Parser String | |
comma = Token.comma lexer | |
semi :: Parser String | |
semi = Token.semi lexer | |
integer :: Parser Integer | |
integer = Token.integer lexer | |
float :: Parser Double | |
float = Token.float lexer | |
stringLiteral :: Parser String | |
stringLiteral = Token.stringLiteral lexer | |
whiteSpace :: Parser () | |
whiteSpace = Token.whiteSpace lexer | |
-- Expression parser | |
parseExpr :: Parser Expr | |
parseExpr = parseSeq | |
parseSeq :: Parser Expr | |
parseSeq = do | |
exprs <- parseAssign `sepBy1` comma | |
case exprs of | |
[e] -> return e | |
es -> return $ ESeq es | |
parseAssign :: Parser Expr | |
parseAssign = parseOr | |
parseOr :: Parser Expr | |
parseOr = chainl1 parseAnd (reservedOp ";" >> return (EBinOp BOr)) | |
parseAnd :: Parser Expr | |
parseAnd = chainl1 parseComparison (reservedOp "," >> return (EBinOp BAnd)) | |
parseComparison :: Parser Expr | |
parseComparison = chainl1 parseArithmetic parseCompOp | |
where | |
parseCompOp = choice | |
[ reservedOp "==" >> return (EBinOp Eq) | |
, reservedOp "/=" >> return (EBinOp Ne) | |
, reservedOp "=<" >> return (EBinOp Le) | |
, reservedOp ">=" >> return (EBinOp Ge) | |
, reservedOp "<" >> return (EBinOp Lt) | |
, reservedOp ">" >> return (EBinOp Gt) | |
] | |
parseArithmetic :: Parser Expr | |
parseArithmetic = chainl1 parseTerm parseArithOp | |
where | |
parseArithOp = choice | |
[ reservedOp "+" >> return (EBinOp Add) | |
, reservedOp "-" >> return (EBinOp Sub) | |
, reservedOp "*" >> return (EBinOp Mul) | |
, reservedOp "div" >> return (EBinOp DivInt) | |
, reservedOp "/" >> return (EBinOp DivFloat) | |
, reservedOp "rem" >> return (EBinOp Rem) | |
, reservedOp "band" >> return (EBinOp BAnd) | |
, reservedOp "bor" >> return (EBinOp BOr) | |
, reservedOp "bxor" >> return (EBinOp BXor) | |
, reservedOp "bsl" >> return (EBinOp BSL) | |
, reservedOp "bsr" >> return (EBinOp BSR) | |
] | |
parseTerm :: Parser Expr | |
parseTerm = parseUnary | |
parseUnary :: Parser Expr | |
parseUnary = choice | |
[ reservedOp "-" >> EUnaryOp Neg <$> parseUnary | |
, reservedOp "bnot" >> EUnaryOp BNot <$> parseUnary | |
, parseFactor | |
] | |
parseFactor :: Parser Expr | |
parseFactor = choice | |
[ parseNumber | |
, parseAtom | |
, parseVariable | |
, parseTuple | |
, parseList | |
, parseCall | |
, parseSend | |
, parseSpawn | |
, parseReceive | |
, parseCase | |
, parseLet | |
, parens parseExpr | |
] | |
parseNumber :: Parser Expr | |
parseNumber = try (EFloat <$> float) <|> (EInt <$> integer) | |
parseAtom :: Parser Expr | |
parseAtom = EAtom <$> parseAtomLiteral | |
where | |
parseAtomLiteral = choice | |
[ T.pack <$> stringLiteral | |
, reserved "true" >> return "true" | |
, reserved "false" >> return "false" | |
, reserved "nil" >> return "nil" | |
] | |
parseVariable :: Parser Expr | |
parseVariable = EVar <$> identifier | |
parseTuple :: Parser Expr | |
parseTuple = braces $ ETuple <$> parseExpr `sepBy` comma | |
parseList :: Parser Expr | |
parseList = brackets $ EList <$> parseExpr `sepBy` comma | |
parseCall :: Parser Expr | |
parseCall = do | |
func <- identifier | |
args <- parens (parseExpr `sepBy` comma) | |
return $ ECall (EVar func) args | |
parseSend :: Parser Expr | |
parseSend = do | |
dest <- parseFactor | |
reservedOp "!" | |
msg <- parseExpr | |
return $ ESend dest msg | |
parseSpawn :: Parser Expr | |
parseSpawn = do | |
reserved "spawn" | |
parens $ do | |
mod_expr <- parseExpr | |
comma | |
func_expr <- parseExpr | |
comma | |
args_expr <- parseExpr | |
return $ ESpawn mod_expr func_expr [args_expr] | |
parseReceive :: Parser Expr | |
parseReceive = do | |
reserved "receive" | |
clauses <- many parseReceiveClause | |
timeout <- optionMaybe parseAfter | |
reserved "end" | |
return $ EReceive clauses timeout | |
where | |
parseReceiveClause = do | |
pattern <- parseExpr | |
reservedOp "->" | |
body <- parseExpr | |
semi | |
return $ ReceiveClause pattern body | |
parseAfter = do | |
reserved "after" | |
timeout_expr <- parseExpr | |
reservedOp "->" | |
body <- parseExpr | |
return (timeout_expr, body) | |
parseCase :: Parser Expr | |
parseCase = do | |
reserved "case" | |
expr <- parseExpr | |
reserved "of" | |
clauses <- many parseCaseClause | |
reserved "end" | |
return $ ECase expr clauses | |
where | |
parseCaseClause = do | |
pattern <- parseExpr | |
reservedOp "->" | |
body <- parseExpr | |
semi | |
return $ CaseClause pattern body | |
parseLet :: Parser Expr | |
parseLet = do | |
reserved "let" | |
var <- identifier | |
reservedOp "=" | |
value <- parseExpr | |
reserved "in" | |
body <- parseExpr | |
return $ ELet var value body | |
-- Function and module parsers | |
parseFunction :: Parser Function | |
parseFunction = do | |
name <- identifier | |
params <- parens (identifier `sepBy` comma) | |
reservedOp "->" | |
body <- parseExpr | |
return $ Function name (length params) params body | |
parseModule :: Parser Module | |
parseModule = do | |
reserved "module" | |
name <- identifier | |
whiteSpace | |
exports <- parseExports | |
functions <- many parseFunction | |
return $ Module name exports functions | |
where | |
parseExports = do | |
reserved "export" | |
brackets (identifier `sepBy` comma) | |
-- ===== COMPILER IMPLEMENTATION ===== | |
addLiteral :: Term -> CompilerM Int | |
addLiteral term = do | |
st <- get | |
case elemIndex term (csLiterals st) of | |
Just idx -> return idx | |
Nothing -> do | |
let idx = length (csLiterals st) | |
put st { csLiterals = csLiterals st ++ [term] } | |
return idx | |
addAtom :: Text -> CompilerM Int | |
addAtom atom = do | |
st <- get | |
case elemIndex atom (csAtoms st) of | |
Just idx -> return idx | |
Nothing -> do | |
let idx = length (csAtoms st) | |
put st { csAtoms = csAtoms st ++ [atom] } | |
return idx | |
allocReg :: Text -> CompilerM Int | |
allocReg var = do | |
st <- get | |
case Map.lookup var (csVars st) of | |
Just reg -> return reg | |
Nothing -> do | |
let reg = csNextReg st | |
put st { csVars = Map.insert var reg (csVars st) | |
, csNextReg = reg + 1 } | |
return reg | |
emit :: OpCode -> [Word32] -> CompilerM () | |
emit op args = do | |
st <- get | |
let instr = Instruction op args | |
put st { csCode = csCode st ++ [instr] } | |
compileExpr :: Expr -> CompilerM () | |
compileExpr expr = case expr of | |
EInt n -> do | |
idx <- addLiteral (TInt n) | |
emit PushLiteral [fromIntegral idx] | |
EFloat f -> do | |
idx <- addLiteral (TFloat f) | |
emit PushLiteral [fromIntegral idx] | |
EAtom a -> do | |
idx <- addLiteral (TAtom a) | |
emit PushLiteral [fromIntegral idx] | |
EVar var -> do | |
reg <- allocReg var | |
emit PushVar [fromIntegral reg] | |
ETuple exprs -> do | |
mapM_ compileExpr exprs | |
emit MakeTuple [fromIntegral $ length exprs] | |
EList exprs -> do | |
mapM_ compileExpr exprs | |
emit MakeList [fromIntegral $ length exprs] | |
EBinOp op lhs rhs -> do | |
compileExpr lhs | |
compileExpr rhs | |
emit (binOpToOpCode op) [] | |
EUnaryOp op expr -> do | |
compileExpr expr | |
case op of | |
Neg -> do | |
-- Push 0 and subtract | |
zeroIdx <- addLiteral (TInt 0) | |
emit PushLiteral [fromIntegral zeroIdx] | |
emit Swap [] | |
emit Sub [] | |
BNot -> emit BNot [] | |
ECall func args -> do | |
mapM_ compileExpr args | |
compileExpr func | |
emit Call [fromIntegral $ length args] | |
ESend dest msg -> do | |
compileExpr dest | |
compileExpr msg | |
emit Send [] | |
ESpawn modExpr funcExpr args -> do | |
compileExpr modExpr | |
compileExpr funcExpr | |
mapM_ compileExpr args | |
emit Spawn [] | |
ELet var value body -> do | |
compileExpr value | |
reg <- allocReg var | |
emit Pop [] -- Store in register (simplified) | |
compileExpr body | |
ESeq exprs -> mapM_ compileExpr exprs | |
EReceive clauses timeout -> do | |
-- Simplified receive compilation | |
case timeout of | |
Nothing -> emit Receive [] | |
Just (timeoutExpr, _) -> do | |
compileExpr timeoutExpr | |
emit ReceiveTimeout [] | |
ECase expr clauses -> do | |
-- Simplified case compilation | |
compileExpr expr | |
-- Would need more sophisticated pattern matching | |
emit Nop [] | |
EMatch _ _ -> | |
throwError "Pattern matching not yet implemented" | |
binOpToOpCode :: BinOp -> OpCode | |
binOpToOpCode op = case op of | |
Add -> Add | |
Sub -> Sub | |
Mul -> Mul | |
DivInt -> DivInt | |
DivFloat -> DivFloat | |
Rem -> Rem | |
BAnd -> BAnd | |
BOr -> BOr | |
BXor -> BXor | |
BSL -> BSL | |
BSR -> BSR | |
Eq -> Eq | |
Ne -> Ne | |
Lt -> Lt | |
Le -> Le | |
Gt -> Gt | |
Ge -> Ge | |
compileFunction :: Function -> CompilerM [Instruction] | |
compileFunction func = do | |
-- Reset state for new function | |
modify $ \st -> st { csVars = Map.empty, csNextReg = 0, csCode = [] } | |
-- Allocate registers for parameters | |
mapM_ allocReg (funcParams func) | |
-- Compile function body | |
compileExpr (funcBody func) | |
-- Add return instruction | |
emit Ret [] | |
-- Return generated code | |
gets csCode | |
compileModule :: Module -> CompilerM CompiledModule | |
compileModule mod = do | |
-- Reset compiler state | |
put initCompilerState | |
-- Add module name to atoms | |
_ <- addAtom (moduleName mod) | |
-- Compile all functions | |
allCode <- concat <$> mapM compileFunction (moduleFunctions mod) | |
-- Get final state | |
st <- get | |
return $ CompiledModule | |
{ cmName = moduleName mod | |
, cmExports = moduleExports mod | |
, cmCode = allCode | |
, cmLiterals = csLiterals st | |
, cmAtoms = csAtoms st | |
} | |
-- ===== CODE GENERATION ===== | |
generateZigInstruction :: Instruction -> Text | |
generateZigInstruction (Instruction op args) = | |
case args of | |
[] -> T.concat ["Instruction.make(.", T.pack (show op), ")"] | |
[a] -> T.concat ["Instruction.make1(.", T.pack (show op), ", ", T.pack (show a), ")"] | |
[a, b] -> T.concat ["Instruction.make2(.", T.pack (show op), ", ", T.pack (show a), ", ", T.pack (show b), ")"] | |
[a, b, c] -> T.concat ["Instruction.make3(.", T.pack (show op), ", ", T.pack (show a), ", ", T.pack (show b), ", ", T.pack (show c), ")"] | |
_ -> T.concat ["Instruction.make(.", T.pack (show op), ") // TODO: handle more args"] | |
generateZigTerm :: Term -> Text | |
generateZigTerm term = case term of | |
TInt n -> T.concat ["Term.make_integer(", T.pack (show n), ")"] | |
TFloat f -> T.concat ["Term{ .tag = .float, .data = .{ .float = ", T.pack (show f), " } }"] | |
TAtom a -> T.concat ["Term.make_atom(try vm.atom_table.intern(\"", a, "\"))"] | |
TNil -> "Term.make_nil()" | |
generateZigCode :: CompiledModule -> Text | |
generateZigCode cm = T.unlines | |
[ "// Generated code for module: " <> cmName cm | |
, "" | |
, "const " <> cmName cm <> "_code = [_]Instruction{" | |
, T.unlines (map ((" " <>) . (<> ",") . generateZigInstruction) (cmCode cm)) | |
, "};" | |
, "" | |
, "const " <> cmName cm <> "_literals = [_]Term{" | |
, T.unlines (map ((" " <>) . (<> ",") . generateZigTerm) (cmLiterals cm)) | |
, "};" | |
] | |
-- ===== MAIN COMPILER INTERFACE ===== | |
compile :: Text -> Either Text CompiledModule | |
compile source = do | |
-- Parse the source code | |
mod <- case parse parseModule "source" source of | |
Left err -> Left (T.pack $ show err) | |
Right mod -> Right mod | |
-- Compile the module | |
case runExcept (evalStateT (compileModule mod) initCompilerState) of | |
Left err -> Left err | |
Right compiled -> Right compiled | |
compileToZig :: Text -> Either Text Text | |
compileToZig source = do | |
compiled <- compile source | |
return $ generateZigCode compiled | |
-- ===== EXAMPLE USAGE ===== | |
exampleSource :: Text | |
exampleSource = T.unlines | |
[ "module example" | |
, "export [factorial, fibonacci]" | |
, "" | |
, "factorial(0) -> 1" | |
, "factorial(N) -> N * factorial(N - 1)" | |
, "" | |
, "fibonacci(0) -> 0" | |
, "fibonacci(1) -> 1" | |
, "fibonacci(N) -> fibonacci(N - 1) + fibonacci(N - 2)" | |
, "" | |
, "main() ->" | |
, " let Result = factorial(5) in" | |
, " spawn(io, format, [\"Factorial: ~p~n\", [Result]])" | |
] | |
-- For testing | |
testCompiler :: IO () | |
testCompiler = do | |
case compileToZig exampleSource of | |
Left err -> putStrLn $ "Compilation error: " ++ T.unpack err | |
Right zigCode -> putStrLn $ T.unpack zigCode |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment