-
-
Save volovikariel/5ccac501b3ecedcf87ae2fb217949cc8 to your computer and use it in GitHub Desktop.
A simple implementation of a recursive-descent parser for a language of boolean expressions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A simple implementation of a recursive-descent parser for a language of boolean expressions. | |
Forked from: https://gist.github.com/iafisher/6f53ce4df29c91ac9276c13a113ccf9f | |
This fork aims to allow for strings instead of 0 and 1 symbols, and these strings themselves are then to be evaluated. | |
e.g: Instead of (0 or 1), you could have (is_happy or is_sad). | |
Additionally, allow for multiple operators within a single set of parentheses, not only boolean. | |
e.g: Allow for (0 or 1 or 2 or 3 or 4 or 5 ...) | |
Note that only a single operator type is allowed however, as it'd be ambiguous otherwise. | |
e.g of not allowed: (0 or 1 and 2) | |
""" | |
import re | |
FUNC_BY_OP = { | |
"&": lambda x, y: x and y, | |
"|": lambda x, y: x or y, | |
} | |
OPS = list(FUNC_BY_OP.keys()) | |
SYMBOLS = OPS + ["(", ")"] | |
def safe_pop(stack: list): | |
try: | |
return stack.pop(0) | |
except IndexError: | |
raise SyntaxError("Unexpected end of expression") | |
def eval_str(expr, truthy_mapping): | |
"""All binary operators must be wrapped in parentheses, even when it would be unambiguous to omit them.""" | |
tokens = tokenize(expr) | |
ast = parse_expr(tokens) | |
return eval_ast(ast, truthy_mapping) | |
def eval_ast(ast, truthy_mapping: dict[str, int]): | |
"""Evaluate an AST as returned by match_expr.""" | |
is_terminal = not isinstance(ast, list) | |
if is_terminal: | |
val = ast | |
# If it's something like (0 | 0 | 1), it'll be broken down into (0 | 0) | 1 | |
# the result of the first (0 | 0) will be either True or False, so we just return that | |
if isinstance(val, bool): | |
return val | |
if val not in truthy_mapping.keys(): | |
raise SyntaxError(f"Operand '{val}' was not provided a True/False value") | |
return truthy_mapping[val] | |
lhs, op, rhs = safe_pop(ast), safe_pop(ast), safe_pop(ast) | |
if op not in OPS: | |
raise ValueError(f"Unknown operator '{op}'") | |
func = FUNC_BY_OP[op] | |
res = func(eval_ast(lhs, truthy_mapping), eval_ast(rhs, truthy_mapping)) | |
while len(ast) > 0: | |
# If op is short circuitable | |
# e.g: if it's an OR operation and we already have truthy value, just return true | |
# e.g: if it's an AND operation and we already have a negative value, just return false | |
if (op in ["|"] and res) or (op in ["&"] and not res): | |
return res | |
op = safe_pop(ast) | |
rhs = safe_pop(ast) | |
res = func(eval_ast(res, truthy_mapping), eval_ast(rhs, truthy_mapping)) | |
return res | |
def parse_expr(tokens: list[str]): | |
"""Given a list of tokens (as strings), return the abstract syntax tree as parsed by the grammar | |
START -> EXPR | |
EXPR -> VALUE | |
EXPR -> ( EXPR (OP EXPR)+ ) | |
OP -> & | |
OP -> | | |
VALUE -> any string | |
""" | |
# If it's a terminal value | |
if isinstance(tokens, str) and tokens not in SYMBOLS: | |
return tokens | |
token: list = safe_pop(tokens) | |
result = [] | |
if token not in SYMBOLS: | |
# It's a terminal value | |
return token | |
if token == "(": | |
# If there's not a lhs, op, rhs, closing parenthesis, then it's a syntax error | |
if len(tokens) < 4: | |
raise SyntaxError( | |
f"Expected '(', lhs, op, rhs, ')', but got '(', {",".join(tokens)}" | |
) | |
lhs = parse_expr(tokens) | |
op = safe_pop(tokens) | |
if op not in OPS: | |
raise SyntaxError(f'expected one of [ {",".join(OPS)} ], got "{op}"') | |
rhs = parse_expr(tokens) | |
result.extend([lhs, op, rhs]) | |
# if there's more to parse | |
while len(tokens) > 0 and (next_token := safe_pop(tokens)): | |
if next_token == op: | |
# Keep going | |
rhs = parse_expr(tokens) | |
result.extend([op, rhs]) | |
elif next_token in OPS and next_token != op: | |
raise SyntaxError( | |
f"Multiple operators {op} and {next_token} present on single level. Must all be the same." | |
) | |
elif next_token == ")": | |
return result | |
else: | |
# Undo the pop | |
tokens.insert(0, next_token) | |
return result | |
return result | |
else: | |
raise SyntaxError(f"Unexpected token {token}, expected '('") | |
def tokenize(expr): | |
"""Return a list of tokens from the expression.""" | |
# Remove all whitespace | |
# ((( foo|bar) &baz)| bar) -> (((foo|bar)&baz)|bar) | |
expr = re.sub(r"\s+", "", expr) | |
# Pad all operators/parentheses with spaces | |
# (((foo|bar)&baz)|bar) -> ( ( ( foo | bar ) & baz ) | bar ) | |
for symbol in SYMBOLS: | |
expr = expr.replace(symbol, f" {symbol} ") | |
return expr.split() | |
# Examples: | |
truthy_mapping = {"0": False, "1": True} | |
# False | |
eval_str("0", truthy_mapping) | |
# True | |
eval_str("(0|1)", truthy_mapping) | |
# True | |
eval_str("((0|1)|1)", truthy_mapping) | |
# False | |
eval_str("((0&1)|(0&1))", truthy_mapping) | |
# Your true/values heres | |
truthy_mapping.update({"COMP248": True, "COMP249": False}) | |
while True: | |
try: | |
expr = input(">>> ") | |
except EOFError: | |
break | |
try: | |
print(eval_str(expr, truthy_mapping)) | |
except SyntaxError as e: | |
print("Error:", e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment