Skip to content

Instantly share code, notes, and snippets.

@baijum
Last active July 1, 2025 15:39
Show Gist options
  • Save baijum/ed960b7b40ce7370e9187ef64c776d45 to your computer and use it in GitHub Desktop.
Save baijum/ed960b7b40ce7370e9187ef64c776d45 to your computer and use it in GitHub Desktop.
Asthra Programming Language Grammar
# Asthra Programming Language Grammar
Program <- PackageDecl ImportDecl* TopLevelDecl* # Entry point: package + imports + declarations
PackageDecl <- 'package' SimpleIdent ';' # Package name (Go-style organization)
ImportDecl <- 'import' ImportPath ImportAlias? ';' # Import external modules/packages with optional aliases
ImportPath <- STRING # Validated by build system: stdlib/*, github.com/*, gitlab.com/*, ./relative
ImportAlias <- 'as' SimpleIdent # Import alias for conflict resolution: import "path" as Name
TopLevelDecl <- (HumanReviewTag / SecurityTag / SemanticTag)* VisibilityDecl # Annotated declarations
VisibilityDecl <- VisibilityModifier (FunctionDecl / StructDecl / EnumDecl / ExternDecl / ConstDecl / ImplDecl) # Explicit public/private visibility required
VisibilityModifier <- 'pub' / 'priv' # Explicit public/private visibility (no implicit defaults)
# Immutable by Default: Explicit Mutability Everywhere
# - All variables are immutable by default using 'let' keyword
# - 'mut' keyword required for any mutable local variable anywhere in the code
# - Function parameters are always immutable (no 'mut' allowed)
# - Struct fields follow the containing variable's mutability
# - This provides maximum AI generation clarity with minimal complexity
# Annotation system for AI generation, security, and FFI safety
SemanticTag <- '#[' IDENT '(' AnnotationParams ')' ']' / '#[' 'non_deterministic' ']' # General annotations + non-deterministic marker
AnnotationParams<- AnnotationParam (',' AnnotationParam)* / 'none' # Key-value annotation parameters: explicit params or none marker
AnnotationParam <- IDENT '=' AnnotationValue # Parameter: name = value
AnnotationValue <- STRING / IDENT / INT / BOOL # Supported parameter value types
HumanReviewTag <- '#[human_review(' ('low'|'medium'|'high') ')]' # AI-human collaboration priority levels
SecurityTag <- '#[' ('constant_time' | 'volatile_memory') ']' # Security-critical code annotations
OwnershipTag <- '#[ownership(' ('gc'|'c'|'pinned') ')]' # Memory management strategy annotations
FFITransferTag <- '#[' ('transfer_full' | 'transfer_none') ']' # FFI ownership transfer semantics
LifetimeTag <- '#[borrowed]' # FFI borrowed reference annotation
SafeFFIAnnotation <- FFITransferTag / LifetimeTag # Safe FFI annotation (single, mutually exclusive)
# Const declarations for compile-time constants
ConstDecl <- 'const' SimpleIdent ':' Type '=' ConstExpr ';' # Compile-time constant declaration with required type annotation
ConstExpr <- Literal / SimpleIdent / BinaryConstExpr / UnaryConstExpr / SizeOf # Compile-time evaluable expressions
BinaryConstExpr<- ConstExpr BinaryOp ConstExpr # Binary operations on constants
UnaryConstExpr <- UnaryOp ConstExpr # Unary operations on constants
# Mutability control for variables and parameters
MutModifier <- 'mut' # Mutability keyword for local variables only
# Function and external function declarations
FunctionDecl <- 'fn' SimpleIdent '(' ParamList ')' '->' SafeFFIAnnotation? Type Block # Function with explicit parameter list required
ExternDecl <- 'extern' STRING? 'fn' SimpleIdent '(' ExternParamList ')' '->' SafeFFIAnnotation? Type ';' # Foreign function interface with explicit parameters
StructDecl <- OwnershipTag? 'struct' SimpleIdent TypeParams? '{' StructContent '}' # Struct with explicit content required
StructContent <- StructFieldList / 'none' # Explicit struct content: fields or none marker
StructFieldList<- StructField (',' StructField)* # Struct fields with consistent comma separators
StructField <- VisibilityModifier? SimpleIdent ':' Type # Named field with optional visibility (no MutModifier allowed)
# Enum declarations with generics and variants
EnumDecl <- OwnershipTag? 'enum' SimpleIdent TypeParams? '{' EnumContent '}' # Enum with explicit content required
EnumContent <- EnumVariantList / 'none' # Explicit enum content: variants or none marker
EnumVariantList<- EnumVariant (',' EnumVariant)* # Enum variants with consistent comma separators
EnumVariant <- VisibilityModifier? SimpleIdent EnumVariantData? # Variant with optional associated data
EnumVariantData<- '(' Type ')' / '(' TypeList ')' # Single type or tuple of types
TypeParams <- '<' TypeParam (',' TypeParam)* '>' # Generic type parameters
TypeParam <- SimpleIdent # Type parameter name
TypeList <- Type (',' Type)* # Comma-separated type list
# Implementation blocks for struct methods (object-oriented features)
ImplDecl <- 'impl' SimpleIdent '{' MethodDecl* '}' # Method implementations for structs (requires visibility modifier)
MethodDecl <- VisibilityModifier 'fn' SimpleIdent '(' MethodParamList ')' '->' Type Block # Instance methods with explicit parameters required
MethodParamList<- SelfParam (',' Param)* / ParamList # Methods with 'self' or regular functions
SelfParam <- 'self' # Instance method self parameter (always immutable)
# Function parameters with FFI annotations (always immutable - no MutModifier allowed)
ParamList <- Param (',' Param)* / 'none' # Function parameters: explicit params or none marker
Param <- SafeFFIAnnotation? SimpleIdent ':' Type # Parameter with optional single FFI annotation (no MutModifier allowed)
ExternParamList<- ExternParam (',' ExternParam)* / 'none' # External function parameters: explicit params or none marker
ExternParam <- SafeFFIAnnotation? SimpleIdent ':' Type # External parameter with single FFI annotation (no MutModifier allowed)
# Type system with built-in and composite types
Type <- BaseType / SliceType / FixedArrayType / StructType / EnumType / PtrType / ResultType / OptionType / TaskHandleType / TupleType / NeverType # All supported types
TupleType <- '(' Type ',' Type (',' Type)* ')' # Tuple types: (T, U) or (T, U, V) - minimum 2 elements
NeverType <- 'Never' # Never type for functions that don't return
BaseType <- 'int' / 'float' / 'bool' / 'string' / 'void' / 'usize' / 'isize' / # Primitive types (void preserved as legitimate type)
'u8' / 'i8' / 'u16' / 'i16' / 'u32' / 'i32' / 'u64' / 'i64' / 'u128' / 'i128' / 'f32' / 'f64'
FixedArrayType <- '[' ConstExpr ']' Type # Fixed-size arrays: [size]Type
SliceType <- '[' ']' Type # Dynamic arrays: []Type
StructType <- SimpleIdent TypeArgs? # User-defined structs with optional generics
EnumType <- SimpleIdent TypeArgs? # User-defined enums with optional generics
TypeArgs <- '<' Type (',' Type)* '>' # Generic type arguments
PtrType <- '*mut' Type / '*const' Type # Mutable and immutable pointers
ResultType <- 'Result' '<' Type ',' Type '>' # Error handling: Result<Success, Error>
OptionType <- 'Option' '<' Type '>' # Optional values: Option<T>
TaskHandleType <- 'TaskHandle' '<' Type '>' # Concurrency task handles: TaskHandle<T>
# Code blocks and statements
Block <- '{' Statement* '}' # Code block with multiple statements
UnsafeBlock <- 'unsafe' Block # Unsafe operations block for FFI/raw pointers (orthogonal to mutability)
Statement <- VarDecl / AssignStmt / IfLetStmt / IfStmt / ForStmt / ReturnStmt / SpawnStmt / ExprStmt / UnsafeBlock / MatchStmt / BreakStmt / ContinueStmt # All statement types
VarDecl <- 'let' MutModifier? SimpleIdent ':' Type OwnershipTag? '=' Expr ';' # Variable declaration with optional mutability modifier
AssignStmt <- MutableLValue '=' Expr ';' # Assignment to variables, fields, or array elements (must be mutable)
MutableLValue <- SimpleIdent LValueSuffix* # Left-hand side of assignments (validated for mutability in semantic analysis)
IfStmt <- 'if' Expr Block ('else' (Block / IfStmt))? # Conditional statements with optional else/else if
IfLetStmt <- 'if' 'let' Pattern '=' Expr Block ('else' Block)? # Pattern matching in conditionals
ForStmt <- 'for' SimpleIdent 'in' Expr Block # Iterator-based loops
ReturnStmt <- 'return' Expr ';' # Function return with required expression - eliminates AI ambiguity
ExprStmt <- Expr ';' # Expression as statement
SpawnStmt <- 'spawn' PostfixExpr ';' / 'spawn_with_handle' SimpleIdent '=' PostfixExpr ';' # Tier 1 concurrency: spawn and spawn with handle capture
MatchStmt <- 'match' Expr '{' MatchArm* '}' # Pattern matching statement
MatchArm <- Pattern '=>' Block # Match arm with pattern and code block
BreakStmt <- 'break' ';' # Exit current loop
ContinueStmt <- 'continue' ';' # Skip to next iteration
# Pattern matching system for destructuring
Pattern <- EnumPattern / StructPattern / TuplePattern / SimpleIdent # Supported pattern types
TuplePattern <- '(' Pattern ',' Pattern (',' Pattern)* ')' # Tuple patterns: (pattern1, pattern2) or (pattern1, pattern2, pattern3) - minimum 2 elements
EnumPattern <- SimpleIdent '.' SimpleIdent ('(' PatternArgs ')')? # Enum variant matching with optional extraction
PatternArgs <- Pattern (',' Pattern)* / 'none' # Nested patterns for complex destructuring: explicit patterns or none marker
StructPattern <- SimpleIdent TypeArgs? '{' FieldPattern (',' FieldPattern)* '}' # Struct destructuring with optional generics
FieldPattern <- SimpleIdent ':' FieldBinding # Always explicit field binding syntax
FieldBinding <- SimpleIdent / '_' # Binding name or ignore marker
# Expression system with proper operator precedence (fixed shift operator precedence)
Expr <- LogicOr # Entry point for expressions
LogicOr <- LogicAnd ('||' LogicAnd)* # Logical OR (lowest precedence)
LogicAnd <- BitwiseOr ('&&' BitwiseOr)* # Logical AND
BitwiseOr <- BitwiseXor ('|' BitwiseXor)* # Bitwise OR
BitwiseXor <- BitwiseAnd ('^' BitwiseAnd)* # Bitwise XOR
BitwiseAnd <- Equality ('&' Equality)* # Bitwise AND
Equality <- Relational (('==' / '!=') Relational)* # Equality comparison (separated from relational)
Relational <- Shift (('<' / '<=' / '>' / '>=') Shift)* # Relational comparison (separated from equality)
Shift <- Add (('<<' / '>>') Add)* # Bitwise shift operations (fixed precedence)
Add <- Mult (('+' / '-') Mult)* # Addition and subtraction
Mult <- Unary (('*' / '/' / '%') Unary)* # Multiplication, division, modulo
Unary <- ('await' PostfixExpr) / (UnaryPrefix PostfixExpr) # Unary operations
UnaryPrefix <- LogicalPrefix? PointerPrefix? # Restricted unary operators (prevents !!x, **ptr)
LogicalPrefix <- ('!' / '-' / '~') # Logical not, arithmetic negation, bitwise complement
PointerPrefix <- ('*' / '&') # Dereference and address-of operators
PostfixExpr <- Primary PostfixSuffix* # Postfix operations (highest precedence)
PostfixSuffix <- '(' ArgList ')' / '.' FieldOrIndex / '[' SliceOrIndexExpr ']' # Function calls, field/tuple access, slice/index
FieldOrIndex <- SimpleIdent / TupleIndex # Field name or tuple element index
TupleIndex <- [0-9]+ # Numeric index for tuple element access: .0, .1, .2, etc.
SliceOrIndexExpr <- SliceExpr / Expr # Go-style slicing or indexing
SliceExpr <- SliceBounds # Slice bounds specification
SliceBounds <- Expr ':' Expr / # start:end (slice from start to end)
Expr ':' / # start: (slice from start to end)
':' Expr / # :end (slice from beginning to end)
':' # : (full slice copy)
Primary <- Literal / EnumConstructor / SimpleIdent / '(' Expr ')' / SizeOf / AssociatedFuncCall # Primary expressions (SimpleIdent includes predeclared identifiers)
AssociatedFuncCall <- (SimpleIdent / GenericType) '::' SimpleIdent '(' ArgList ')' # Static method calls: Type::function() or Type<T>::function()
GenericType <- SimpleIdent TypeArgs # Generic type for associated function calls
EnumConstructor<- SimpleIdent '.' SimpleIdent ('(' ArgList ')')? # Enum variant construction: Enum.Variant(args)
SizeOf <- 'sizeof' '(' Type ')' # Compile-time size calculation
# Function arguments and assignable expressions
ArgList <- Expr (',' Expr)* / 'none' # Function call arguments: explicit args or none marker
LValue <- SimpleIdent LValueSuffix* # Left-hand side of assignments
LValueSuffix <- '.' SimpleIdent / '[' Expr ']' / '*' # Field access, array indexing, pointer dereference
# Literal values and data structures
Literal <- INT / FLOAT / BOOL / STRING / CHAR / ArrayLiteral / StructLiteral / TupleLiteral # All literal types
TupleLiteral <- '(' Expr ',' Expr (',' Expr)* ')' # Tuple literals: (expr1, expr2) or (expr1, expr2, expr3) - minimum 2 elements
ArrayLiteral <- '[' ArrayContent ']' # Array literals: [1, 2, 3], [value; count], or [none]
ArrayContent <- RepeatedElements / ArrayElements / 'none' # Array initialization patterns or none marker
RepeatedElements <- ConstExpr ';' ConstExpr # Rust-style repeated elements: [value; count]
ArrayElements <- Expr (',' Expr)* # Comma-separated elements: [elem1, elem2, ...]
StructLiteral <- SimpleIdent TypeArgs? '{' FieldInit (',' FieldInit)* '}' # Struct construction with optional generics
FieldInit <- SimpleIdent ':' Expr # Struct field initialization: field: value
# Lexical rules and identifiers
IDENT <- [a-zA-Z_][a-zA-Z0-9_]* # Base identifier pattern
# =============================================================================
# RESERVED KEYWORDS
# =============================================================================
#
# Reserved keywords are part of the language syntax and CANNOT be used as user identifiers.
# These are different from predeclared identifiers (see section below) which CAN be shadowed.
#
# DESIGN PRINCIPLE: Keywords vs Predeclared Identifiers
# - KEYWORDS: Structural language elements (if, fn, let, etc.) - cannot be redefined
# - PREDECLARED: Utility functions (log, range, etc.) - can be shadowed by user code
#
# This distinction provides the right balance of language stability and user flexibility.
#
ReservedKeyword <- 'as' # Import aliasing
/ 'fn' # Function declaration
/ 'let' # Variable declaration
/ 'const' # Constant declaration
/ 'mut' # Mutability modifier (immutable by default)
/ 'if' # Conditional statements
/ 'else' # Conditional alternative
/ 'enum' # Enum declaration
/ 'match' # Pattern matching
/ 'return' # Function return
/ 'true' # Boolean literal
/ 'false' # Boolean literal
/ 'struct' # Struct declaration
/ 'extern' # Foreign function interface
/ 'spawn' # Task spawning (Tier 1 concurrency)
/ 'spawn_with_handle' # Task spawning with result handle (Tier 1 concurrency)
/ 'unsafe' # Unsafe code blocks (orthogonal to mutability - for pointer safety only)
/ 'package' # Package declaration
/ 'import' # Module importing
/ 'for' # Loop statements
/ 'in' # Iterator keyword
/ 'await' # Async result waiting (Tier 1 concurrency)
/ 'break' # Loop break statement
/ 'continue' # Loop continue statement
/ 'sizeof' # Compile-time size operator
/ 'pub' # Public visibility modifier
/ 'priv' # Private visibility modifier (explicit visibility)
/ 'impl' # Implementation blocks
/ 'self' # Method self parameter
/ 'void' # Type for functions returning nothing (preserved for legitimate usage)
/ 'none' # Structural absence marker (semantic clarity)
/ 'Result' # Built-in Result<T,E> type
/ 'Option' # Built-in Option<T> type
/ 'TaskHandle' # Built-in TaskHandle<T> type
# =============================================================================
# USER IDENTIFIERS
# =============================================================================
#
# SimpleIdent defines valid user identifiers by excluding reserved keywords.
# This production allows predeclared identifiers (log, range, etc.) to be used as regular
# identifiers, enabling shadowing while preventing conflicts with language keywords.
#
SimpleIdent <- !ReservedKeyword IDENT # User identifiers (excludes reserved keywords, allows predeclared)
# Immutable-by-Default Semantic Rules:
# - Variables are immutable by default: 'let x: i32 = 42;' (cannot be reassigned)
# - Explicit mutability required: 'let mut x: i32 = 42;' (can be reassigned anywhere)
# - Function parameters are always immutable: fn process(data: String) (no 'mut' allowed)
# - Struct fields have no explicit mutability (follow containing variable's mutability)
# - 'unsafe' blocks are orthogonal to mutability (for FFI pointer safety, not mutation control)
# - This eliminates AI confusion about state tracking while maintaining C interoperability
# Semantic Clarity: void vs none distinction
# - 'void' is ONLY used as a type for functions returning nothing: fn main() -> void
# - 'none' is ONLY used as a structural marker for absence of elements: fn main(none), struct Empty { none }
# This eliminates parser ambiguity and improves AI generation reliability by creating clear semantic boundaries
# Comment syntax (handled by lexer in practice)
Comment <- SingleLineComment / MultiLineComment # Both comment styles supported
SingleLineComment <- '//' (~'\n' .)* '\n'? # C++-style comments
MultiLineComment <- '/*' (~'*/' .)* '*/' # C-style block comments
# String literals with multi-line support
STRING <- MultiLineString / RegularString # Support both single-line and multi-line strings
RegularString <- '"' StringContent* '"' # Regular single-line string literals
MultiLineString<- RawString / ProcessedString # Two multi-line variants
RawString <- 'r"""' RawContent* '"""' # Raw multi-line (no escape processing)
ProcessedString<- '"""' StringContent* '"""' # Processed multi-line (with escape sequences)
RawContent <- (!('"""') .) # Any character except closing delimiter
StringContent <- EscapeSequence / (!'"' .) # String characters or escape sequences
EscapeSequence <- '\\' ('n' / 't' / 'r' / '\\' / '"' / '\'' / '0') # Standard escape sequences
# Numeric and other literals
INT <- HexLiteral / BinaryLiteral / OctalLiteral / DecimalLiteral
HexLiteral <- ('0x' / '0X') [0-9a-fA-F]+ # Hexadecimal: 0xFF, 0x123ABC
BinaryLiteral <- ('0b' / '0B') [01]+ # Binary: 0b1010, 0B11110000
OctalLiteral <- '0o' [0-7]+ # Octal: 0o777, 0o123 (explicit only)
DecimalLiteral <- [0-9]+ # Decimal: 123, 42 (unchanged)
FLOAT <- [0-9]+ '.' [0-9]* / '.' [0-9]+ # Floating-point literals
BOOL <- 'true' / 'false' # Boolean literals
CHAR <- "'" (EscapeSequence / (!"'" .)) "'" # Character literals with escape support
# =============================================================================
# PREDECLARED IDENTIFIERS
# =============================================================================
#
# Following Go's design philosophy, Asthra provides predeclared identifiers that are
# automatically available in all packages without explicit import statements. These
# identifiers provide essential utility functions and follow these design principles:
#
# 1. SHADOWING ALLOWED: All predeclared identifiers can be shadowed by user declarations
# This provides flexibility while maintaining convenience for common operations
#
# 2. SEMANTIC-LEVEL RESOLUTION: These are NOT keywords in the grammar - they are resolved
# during semantic analysis and registered in the global symbol table
#
# 3. TYPE-SAFE: All predeclared functions have well-defined type signatures and undergo
# full semantic analysis including type checking and overload resolution
#
# 4. OVERLOADING SUPPORT: Functions like 'range' can have multiple signatures, handled
# naturally through semantic analysis rather than complex grammar productions
#
# AVAILABLE PREDECLARED IDENTIFIERS:
#
# Utility Functions:
# - log(message: string) -> void # Debug/diagnostic output to stderr
# - panic(message: string) -> Never # Program termination with error message
# - exit(status: i32) -> Never # Program termination with exit code
# - args() -> []string # Access command-line arguments
# - len(slice: []T) -> usize # Get length of slice or array
#
# Range Functions (overloaded):
# - range(end: i32) -> []i32 # Generate [0, 1, 2, ..., end-1]
# - range(start: i32, end: i32) -> []i32 # Generate [start, start+1, ..., end-1]
#
# Iterator Functions:
# - infinite() -> InfiniteIterator # Generate infinite iterator (requires #[non_deterministic])
#
# USAGE EXAMPLES:
#
# Basic usage (predeclared functions work immediately):
# log("Hello, World!");
# for i in range(5) { log("Count: " + string(i)); }
# let arguments = args();
#
# Shadowing examples (user can override predeclared functions):
# fn log(level: string, msg: string) -> void { /* custom logging */ }
# log("INFO", "Using custom log function"); // Uses user-defined version
# let nums = range(3, 7); // Still uses predeclared range(i32, i32)
#
# IMPLEMENTATION NOTES:
# - Predeclared identifiers are registered during semantic analyzer initialization
# - They are stored in src/analysis/semantic_builtins.c as PredeclaredIdentifier structs
# - Runtime implementations are in the asthra runtime system (asthra_runtime_*)
# - No parser changes needed - treated as regular identifiers during parsing
# - Overload resolution happens during semantic analysis, not parsing
#
# This production is never used in parsing, only for documentation and tooling:
PredeclaredIdent <- 'log' / 'range' / 'infinite' / 'panic' / 'exit' / 'args' / 'len'

Hello World

package main;

pub fn main(none) -> i32 {
    log("Hello, World!");
    return 0;
}

Condition

package main;

pub fn main(none) -> i32 {
    let x: i32 = 10;
    if x > 5 {
        log("x is greater than 5");
    } else {
        log("x is not greater than 5");
    }
    return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment