diff options
| author | Owen Jacobson <owen@grimoire.ca> | 2017-11-13 01:00:33 -0500 |
|---|---|---|
| committer | Owen Jacobson <owen@grimoire.ca> | 2017-11-13 01:01:34 -0500 |
| commit | 5cc96a0fb06fa7d86563f4cb64e5fa9d4f6a09f9 (patch) | |
| tree | 40052668ffe030452b45e3a2d6be8d8fc24acdee | |
| parent | 6a635660bd7b47238642d4a552782687352555ac (diff) | |
Big-ass coding binge presents: a Lisp.
This implements a continuation-passing interpreter, which means we get tail calls ferfree. I stopped short of implementing call/cc, because I don't think we need it, but we can get there if we have to.
| -rw-r--r-- | actinide/__init__.py | 41 | ||||
| -rw-r--r-- | actinide/builtin.py | 44 | ||||
| -rw-r--r-- | actinide/environment.py | 30 | ||||
| -rw-r--r-- | actinide/evaluator.py | 193 | ||||
| -rw-r--r-- | actinide/ports.py | 6 | ||||
| -rw-r--r-- | actinide/reader.py | 131 | ||||
| -rw-r--r-- | actinide/symbol_table.py | 7 | ||||
| -rw-r--r-- | actinide/tokenizer.py | 7 | ||||
| -rw-r--r-- | actinide/types.py | 218 | ||||
| -rw-r--r-- | primer.py | 37 | ||||
| -rw-r--r-- | tests/forms.py | 76 | ||||
| -rw-r--r-- | tests/programs.py | 57 | ||||
| -rw-r--r-- | tests/test_evaluator.py | 18 | ||||
| -rw-r--r-- | tests/test_ports.py | 10 | ||||
| -rw-r--r-- | tests/test_reader.py | 28 | ||||
| -rw-r--r-- | tests/test_tokenizer.py | 4 |
16 files changed, 892 insertions, 15 deletions
diff --git a/actinide/__init__.py b/actinide/__init__.py index e69de29..afa27d8 100644 --- a/actinide/__init__.py +++ b/actinide/__init__.py @@ -0,0 +1,41 @@ +from . import builtin, ports, symbol_table, reader, evaluator, types + +class Session(object): + def __init__(self): + self.symbols = symbol_table.SymbolTable() + self.environment = evaluator.Environment() + + def read(self, port): + if types.string_p(port): + port = ports.string_to_input_port(port) + return reader.read(port, self.symbols) + + def eval(self, form): + cps = evaluator.eval(form, self.environment, self.symbols, None) + return evaluator.run(cps) + + def bind(self, symb, value): + self.environment[self.symbol(symb)] = value + + def bind_void(self, fn): + return self.bind_builtin(builtin.wrap_void(fn)) + + def bind_fn(self, fn): + return self.bind_builtin(builtin.wrap_fn(fn)) + + def bind_builtin(self, fn): + name = builtin.lisp_name(fn) + if name is None: + raise ValueError("Anonymous functions must be bound using `bind`") + symb = self.symbol(name) + self.bind(symb, fn) + return symb + + def get(self, symb): + symb = self.symbol(symb) + return self.environment.find(symb) + + def symbol(self, symb): + if types.string_p(symb): + symb = types.symbol(symb, self.symbols) + return symb diff --git a/actinide/builtin.py b/actinide/builtin.py new file mode 100644 index 0000000..06623b3 --- /dev/null +++ b/actinide/builtin.py @@ -0,0 +1,44 @@ +# ## Tools for binding Python functions into a lisp environment. + +from functools import wraps + +# Derives the lisp name of a python function or method handle, as follows: +# +# * A trailing '_p' in the Python name is converted into a question mark in the +# lisp name. +# +# * Any remaining underscores in the Python name are converted to dashes in the +# lisp name. +# +# This only applies to named functions. Python lambdas and non-function +# callables do not have names. +def lisp_name(fn): + name = fn.__name__ + if name == '<lambda>': + return None + + # Trailing _p is a predicate, translate to ? + if name.endswith('_p'): + name = name[:-2] + '?' + + # Remaining underscores are dashes + name = name.replace('_', '-') + + return name + +# Wraps a function which returns no values in a function which returns an empty +# tuple. +def wrap_void(fn): + @wraps(fn) + def wrapper(*args): + fn(*args) + return () + return wrapper + +# Wraps a function which returns one value in a function which returns a +# one-valued tuple. +def wrap_fn(fn): + @wraps(fn) + def wrapper(*args): + return fn(*args), + return wrapper diff --git a/actinide/environment.py b/actinide/environment.py new file mode 100644 index 0000000..29399fe --- /dev/null +++ b/actinide/environment.py @@ -0,0 +1,30 @@ +# Raised if a binding cannot be found. +class BindingError(Exception): + pass + +# A lookup table binding symbols to values. This may have a parent environment, +# in which case lookups will look through to the parent environment if they are +# not found in the current environment. This allows nested scopes. +class Environment(dict): + # Creates an environment, optionally setting initial values and optionally + # adding a parent to look in for values not found in this environment. + def __init__(self, bindings=(), parent=None): + self.parent = parent + self.update(bindings) + + # Look up a binding in this environment, or in any parent environment. + # Unlike ``[]``, this will continue into any parent environments, raising an + # exception if the name cannot be found in any of them. + # + # The value from the innermost environment containing the name will be + # returned. + def find(self, name): + if name in self: + return self[name] + if self.parent != None: + return self.parent.find(name) + raise BindingError(f'Variable {name} not bound') + + # Sets a value in the current environment. + def define(self, name, value): + self[name] = value diff --git a/actinide/evaluator.py b/actinide/evaluator.py new file mode 100644 index 0000000..49587e2 --- /dev/null +++ b/actinide/evaluator.py @@ -0,0 +1,193 @@ +from .environment import * + +# Circular import. Hard to avoid: `eval` calls `lambda_`, `lambda_` eventually +# calls `Procedure`, and `Procedure` calls `eval`. We indirect the call through +# the module object to avoid problems with import order. +from . import types as t + +# ## EVALUATION +# +# The following system implements a continuation-passing interpreter for +# Actinide. It is heavily inspired by the redex/continuation system described in +# <https://docs.racket-lang.org/reference/eval-model.html> for the Racket +# language. +# +# The call stack of an Actinide program is implemented as a chain of +# continuation functions, which grows and shrinks as evaluation proceeds. +# Because the evaluator itself is tail-recursive, tail calls are free in +# Actinide - they do not cause the call stack to grow. +# +# In principle, this could expose ``call/cc``, but the need for that primitive +# hasn't come up. +# +# Evaluating a form in this system requires both an environment (from the +# ``environment`` module) and a symbol table (from the ``symbol_table`` module). +# Callers are encouraged to use the ``Session`` system exposed in the root of +# this package, instead of calling the invoker directly: a session manages these +# resources automaticallty. The more intrepid user can manually compile an +# arbitrary expression using the ``eval`` continuation factory, described below. +# +# If the interpreter encounters an error during evaluation, the resulting Python +# exception bubbles up out of the interpreter and destroys the state of the +# computation. + +# Reduce a continuation to its final value. +# +# This iteratively calls the current continuation with the current arguments +# until the current continuation is None, then exits, returning the final +# arguments. +# +# This trampoline exists to deal with the absence of tail call optimizations in +# Python. By returning the next continuation rather than invoking it, we avoid +# growing the Python call stack without bound. +# +# The result of evaluating a continuation is always a Python tuple. For +# expressions, this tuple contains the value(s) produced by the expression. For +# forms which do not produce a value, this returns the empty tuple. +def run(continuation, args=()): + while continuation is not None: + continuation, args = continuation(*args) + return args + +# ## FLAT CONTINUATIONS +# +# These continuation factories and transformers produce continuations which +# receive already-evaluated values, and which produce evaluated results. + +# Returns a continuation which yields a single value, verbatim, and chains to a +# known target continuation. This implements evaluation for literals. +def literal(value, continuation): + return lambda: (continuation, (value,)) + +# Returns a continuation which looks up a symbol in an environment, yields the +# result, and chains to a known target continuation. This implements evaluation +# for variable lookups. +def symbol(symb, environment, continuation): + return lambda: (continuation, (environment.find(symb),)) + +# Returns a continuation which yields a newly-created procedure, and chains to a +# known target continuation. This implements evaluation for the tail of a lambda +# form. (The head of the lambda form must be discarded before calling this +# factory.) +def lambda_(defn, environment, symbols, continuation): + formals = t.flatten(t.head(defn)) + body = t.head(t.tail(defn)) + proc = t.Procedure(body, formals, environment, symbols) + return lambda: (continuation, (proc,)) + +# Returns a continuation which takes a value and binds that value to a symbol in +# a specific environment, then chains to a known target continuation. This +# implements evaluation of the `define` special form, once the value is known. +def bind(symbol, environment, continuation): + def bind_(value): + environment.define(symbol, value) + return (continuation, ()) + return bind_ + +# Returns a continuation which takes a value and returns one of two known target +# continuations based on the value. If the value is true (Python truthy), this +# chains to the `on_true` continuation. If the value is not true (Python falsy), +# this chains to the `on_false` continuation. In either case, the continuation +# not chained to is discarded. +def branch(on_true, on_false): + return lambda val: (on_true if val else on_false, ()) + +# Returns a continuation which receives values, and appends them to the values +# passed to this factory, before chaining to a known target continuation. This +# implements intermediate evaluation of list forms, where part of the list is +# already known, as well as splicing for forms that yield multiple values. +def append(args, continuation): + return lambda *tail: (continuation, (*args, *tail)) + +# Transforms a continuation which should receive function results into a +# function call continuation. A function call continuation receives a function +# and a sequence of arguments. If the function is a primitive function, the +# reduction directly calls the function and chains to the wrapped continuation. +# If the function is a procedure, this instead returns a continuation which will +# invoke the procedure, then chain to the wrapped continuation. +def invoke(continuation): + def invoke_(fn, *args): + if isinstance(fn, t.Procedure): + return procedure_call(fn, *args) + return builtin(fn, *args) + + def procedure_call(fn, *args): + call_env = fn.invocation_environment(*args) + call_cont = fn.continuation(call_env, continuation) + return (call_cont, ()) + + def builtin(fn, *args): + result = fn(*args) + return (continuation, result) + return invoke_ + +# ## RECURSIVE CONTINUATIONS +# +# The following continuation factories recurse, producing complex chains of +# continuations. + +# Returns a continuation which, when invoked, evaluates the first step of a form +# and chains to a continuation for the remainder of the form. When the form is +# exhausted, the final continuation chains to a known target continuation. +# +# This is the heart of the continuation-passing transformation. Every valid form +# can be translated into continuation-passing form throught this factory. This +# handles literals, symbols, special forms, and function application. +def eval(value, environment, symbols, continuation): + if t.symbol_p(value): + return symbol(value, environment, continuation) + if not t.list_p(value): + return literal(value, continuation) + # Special forms (all of which begin with a special symbol, discarded here) + if t.head(value) == symbols['if']: + return if_(t.tail(value), environment, symbols, continuation) + if t.head(value) == symbols['define']: + return define(t.tail(value), environment, symbols, continuation) + if t.head(value) == symbols['lambda']: + return lambda_(t.tail(value), environment, symbols, continuation) + # Ran out of alternatives, must be a function application + return apply(value, environment, symbols, invoke(continuation)) + +# Returns a continuation which fully evaluates a `(define symbol expr)` form, +# before chaining to a known target continuation. First, the returned +# continuation evaluates the `expr` (recursively, using `eval` to construct the +# continuation`. The result of this evaluation is chained to a `bind` +# continuation, to store the result of evaluation in the target environment. +# Finally, the `bind` continuation chains to the target continuation. +def define(value, environment, symbols, continuation): + symb, expr = t.flatten(value) + + if not t.symbol_p(symb): + raise RuntimeError("Argument to define not a symbol: {t.display(symb)}") + + bind_cont = bind(symb, environment, continuation) + eval_cont = eval(expr, environment, symbols, bind_cont) + return lambda: (eval_cont, ()) + +# Returns a continuation which fully evaluates an `(if cond if-true if-false)` +# form, before chaining to a known target continuation. First, the returned +# continuation evaluates the `cond` expression (recursively, using `eval` to +# construct the continuation), which chains to a `branch` continuation +# containing continuations for the `if-true` and `if-false` epxressions. The +# `if-true` and `if-false` continuations each chain to the target continuation. +def if_(value, environment, symbols, continuation): + cond, if_true, if_false = t.flatten(value) + + if_true_cont = eval(if_true, environment, symbols, continuation) + if_false_cont = eval(if_false, environment, symbols, continuation) + branch_cont = branch(if_true_cont, if_false_cont) + + return eval(cond, environment, symbols, branch_cont) + +# Returns a continuation which fully evaluates the elements of a list, before +# chaining to a target continuation. If this is applied to an empty list, the +# returned continuation accepts arbitrary arguments and chains to the target +# continuation with those arguments. Otherwise, this evaluates the head of the +# list (recursively, using `eval` to prepare the continuation), then chains to +# an `append` continuation to glue the result onto the result of recursively +# calling `apply` on the tail of the list. +def apply(list, environment, symbols, continuation): + if t.nil_p(list): + return lambda *args: (continuation, args) + tail_cont = apply(t.tail(list), environment, symbols, continuation) + return lambda *args: (eval(t.head(list), environment, symbols, append(args, tail_cont)), ()) diff --git a/actinide/ports.py b/actinide/ports.py index 7748cd7..6f419e5 100644 --- a/actinide/ports.py +++ b/actinide/ports.py @@ -36,17 +36,17 @@ class Port(object): # Read at least 1 and up to ``n`` characters from a port. This consumes them # from the port: they are no longer available to future peeks or reads. ``n`` # must be strictly positive. -def read(port, n): +def read_port(port, n): return port.read(n) # Read all remaining input from a port, consuming it. -def read_fully(port): +def read_port_fully(port): return port.read_fully() # Read at least 1 and up to ``n`` characters from a port, without consuming # them. They will be available on future peeks and reads. ``n`` must be strictly # positive. -def peek(port, n): +def peek_port(port, n): return port.peek(n) # Create an input port from a string. diff --git a/actinide/reader.py b/actinide/reader.py new file mode 100644 index 0000000..06d774c --- /dev/null +++ b/actinide/reader.py @@ -0,0 +1,131 @@ +# ## The Reader +# +# Reads one Actinide form and parses it. + +from decimal import Decimal, InvalidOperation +from .tokenizer import read_token +from .types import * + +# Raised if the reader encounters invalid syntax in the underlying port. +class SyntaxError(Exception): + pass + +# Reads one form from a port. This will consume all of the tokens included in +# the form, but leave trailing data after the final token untouched. +# +# Any symbols read will be resolved using the passed symbol table (from the +# ``symbol_table`` module), to implement interning. +# +# This uses a simple recursive descent algorithm to read forms. If the first +# token is a ``(``, this recursively reads forms until it encounters a trailing +# ``)``, and builds a list (or a dotted pair) out of the forms read. Otherwise, +# this reads the atom found. +def read(port, symbols): + head = read_token(port) + if head is None: + raise SyntaxError("Unexpected end of input") + if head == ')': + raise SyntaxError("Unexpected ')'") + if head == '(': + return read_list(port, symbols) + return read_atom(head, symbols) + +# Reads the body of a list from a port. This will read forms, recursively, until +# it encounters the terminating ``)`` that closes the current list, or until it +# encounters a ``.``, in which case it reads a trailing dotted pair. +# +# Under the hood, this method only reads the first token. If the list continues, +# subsequent atoms are read using read_list_tail, which handles lists and dotted +# pairs. +def read_list(port, symbols): + head = read_token(port) + if head is None: + raise SyntaxError("Unexpected end of input") + if head == ')': + return list() + if head == '(': + return cons( + read_list(port, symbols), + read_list_tail(port, symbols), + ) + return cons( + read_atom(head, symbols), + read_list_tail(port, symbols), + ) + +# Reads the tail of a list from a port, recursively. This will read forms, until +# it encounters either a ``)`` (which terminates the list) or a ``.`` (which +# begins a terminating sequence of dotted pairs). +def read_list_tail(port, symbols): + head = read_token(port) + if head is None: + raise SyntaxError("Unexpected end of input") + if head == ')': + return None + if head == '(': + return cons( + read_list(port, symbols), + read_list_tail(port, symbols), + ) + if head == '.': + return read_cons_head(port, symbols) + return cons( + read_atom(head, symbols), + read_list_tail(port, symbols), + ) + +# Reads the first atom after a dot, then delegates to ``read_cons_tail`` to read +# the second. This generates a dotted pair out of the tail of the input. +def read_cons_head(port, symbols): + head = read_token(port) + if head is None: + raise SyntaxError("Unexpected end of input") + if head == ')': + raise SyntaxError("Unexpected ')'") + if head == '(': + return read_cons_tail( + read_list(port, symbols), + port, + symbols, + ) + return read_cons_tail(read_atom(head, symbols), port, symbols) + +# Reads the second form of a dotted pair from the input. This must either be a +# terminating ``)``, or another dotted pair. +def read_cons_tail(head, port, symbols): + tail = read_token(port) + if tail is None: + raise SyntaxError("Unexpected end of input") + if tail == ')': + return head + if tail == '.': + return cons( + head, + read_cons_head(port, symbols), + ) + raise SyntaxError("Unexpected value after dotted pair") + +# Converts an atom into its lisp representation, using the following priorities: +# +# * ``read_boolean`` +# * ``read_integer`` +# * ``read_decimal`` +# * ``read_symbol`` in the current symbol table (which always succeeds) +# +# The first reader to accept the string determines the type of the result. +def read_atom(atom, symbols): + def read_as_first(val, *funcs): + for func in funcs: + result = func(val) + if result is not None: + return result + + if atom[0] == '"': + return read_string(atom) + return read_as_first( + atom, + read_boolean, + read_integer, + read_decimal, + lambda val: read_symbol(val, symbols), + ) diff --git a/actinide/symbol_table.py b/actinide/symbol_table.py new file mode 100644 index 0000000..ae8cccf --- /dev/null +++ b/actinide/symbol_table.py @@ -0,0 +1,7 @@ +from .types import Symbol + +class SymbolTable(dict): + def __missing__(self, key): + self[key] = result = Symbol(key) + return result + diff --git a/actinide/tokenizer.py b/actinide/tokenizer.py index a69d35d..449fc3d 100644 --- a/actinide/tokenizer.py +++ b/actinide/tokenizer.py @@ -1,4 +1,4 @@ -from .ports import read, peek +from .ports import * # ## TOKENIZATION # @@ -204,7 +204,6 @@ def tokenize_string_character(state): def tokenize_escaped_string_character(state): def tokenize_escaped_string_character_next(port): next = read_next(port) - print(f'Esc: state={repr(state)} next={repr(next)} peek={repr(peek_next(port))}') if next == '': raise TokenError('Unclosed string literal') if next in '\\"': @@ -221,7 +220,7 @@ def tokenize_string_end(state): return tokenize_string_end_next def read_next(port): - return read(port, 1) + return read_port(port, 1) def peek_next(port): - return peek(port, 1) + return peek_port(port, 1) diff --git a/actinide/types.py b/actinide/types.py new file mode 100644 index 0000000..54a0c04 --- /dev/null +++ b/actinide/types.py @@ -0,0 +1,218 @@ +# ## Actinide Types + +from collections import namedtuple +from decimal import Decimal, InvalidOperation + +# Circular import. Hard to avoid: Procedure calls `eval`, `eval` calls +# `lambda_`, `lambda_` eventually calls `Procedure`. We indirect the call +# through the module object to avoid problems with import order. +from . import evaluator as e + +from .environment import * + +# ### Nil +# +# Nil is a type with a single value, usually taken to denote no value. + +nil = None + +def nil_p(value): + return value is None + +def read_nil(value): + return nil + +def display_nil(value): + return '()' + +# ### Booleans +# +# The true and false values. + +true = True +false = False + +def boolean_p(value): + return value in (true, false) + +def read_boolean(value): + if value == '#t': + return true + if value == '#f': + return false + return None + +def display_boolean(value): + return '#t' if value else '#f' + + +# ### Integers +# +# These are fixed-precision numbers with no decimal part, obeying common notions +# of machine integer arithmetic. They support large values. + +integer = int + +def integer_p(value): + return isinstance(value, integer) + +def read_integer(value): + try: + return integer(value) + except ValueError: + return nil + +def display_integer(value): + return str(value) + +# ### Decimals +# +# These are variable-precision numbers, which may have a decimal part. + +decimal = Decimal + +def decimal_p(value): + return isinstance(value, decimal) + +def read_decimal(value): + try: + return decimal(value) + except InvalidOperation: + return nil + +def display_decimal(value): + return str(value) + +# ### Strings +# +# Sequences of characters. + +string = str + +def string_p(value): + return not symbol_p(value) and isinstance(value, string) + +def read_string(value): + value = value[1:-1] + value = value.replace('\\"', '"') + value = value.replace('\\\\', '\\') + return value + +def display_string(value): + value = value.replace('\\', '\\\\') + value = value.replace('"', '\\"') + return f'"{value}"' + +# ### Symbols +# +# Short, interned strings used as identifiers. Interning is handled by a +# SymbolTable. + +class Symbol(object): + def __init__(self, value): + self.value = value + def __hash__(self): + return hash(self.value) + def __str__(self): + return f"{self.value}" + def __repr__(self): + return f'Symbol({repr(self.value)})' + +def symbol(string, symbol_table): + return symbol_table[string] + +def symbol_p(value): + return isinstance(value, Symbol) + +def read_symbol(value, symbol_table): + return symbol(value, symbol_table) + +def display_symbol(value): + return str(value) + +# ### Conses +# +# Pairs. + +Cons = namedtuple('Cons', 'head tail') + +def cons(head, tail): + return Cons(head, tail) + +def cons_p(value): + return isinstance(value, Cons) + +def head(cons): + return cons.head + +def tail(cons): + return cons.tail + +def display_cons(value): + parts = [] + while cons_p(value): + parts.append(display(head(value))) + value = tail(value) + if not nil_p(value): + parts.append('.') + parts.append(display(value)) + return '(' + ' '.join(parts) + ')' + +# ### Lists + +def list(*elems): + if elems: + head, *tail = elems + return cons(head, list(*tail)) + else: + return nil + +def list_p(value): + return nil_p(value) or cons_p(value) and list_p(tail(value)) + +def flatten(list): + r = [] + while not nil_p(list): + r.append(head(list)) + list = tail(list) + return r + +# ### Procedures + +class Procedure(object): + def __init__(self, body, formals, environment, symbols): + self.body = body + self.formals = formals + self.environment = environment + self.symbols = symbols + + def __call__(self, *args): + call_env = self.invocation_environment(*args) + call_cont = self.continuation(call_env, None) + return e.run(call_cont, ()) + + def invocation_environment(self, *args): + return Environment(zip(self.formals, args), self.environment) + + def continuation(self, environment, continuation): + return e.eval(self.body, environment, self.symbols, continuation) + +def procedure_p(value): + return callable(value) + +# ### General-purpose functions +def display(value): + if cons_p(value): + return display_cons(value) + if symbol_p(value): + return display_symbol(value) + if string_p(value): + return display_string(value) + if nil_p(value): + return display_nil(value) + if boolean_p(value): + return display_boolean(value) + if integer_p(value): + return display_integer(value) + if decimal_p(value): + return display_decimal(value) diff --git a/primer.py b/primer.py new file mode 100644 index 0000000..2c6d73a --- /dev/null +++ b/primer.py @@ -0,0 +1,37 @@ +import actinide +import actinide.types as t +import actinide.builtin as b +import actinide.evaluator as e + +session = actinide.Session() +program = session.read(""" +(begin + 1 + 1.0 + "Hello" + (define a + (lambda (b) (values 1 2.2 "three" a b))) + (define pp + (lambda () (pp))) + (print (a "foo")) + (print (eval (list (symbol "a") "bar"))) + (print 0 (values 1 2 3) 4 5) + (pp)) +""") + +def begin(*args): + if args: + return args[-1] + return None + +def values(*args): + return args + +session.bind_builtin(values) +session.bind_void(print) +session.bind_fn(begin) +session.bind_fn(t.list) +session.bind_fn(session.symbol) +session.bind_builtin(session.eval) + +session.eval(program) diff --git a/tests/forms.py b/tests/forms.py new file mode 100644 index 0000000..1a49636 --- /dev/null +++ b/tests/forms.py @@ -0,0 +1,76 @@ +from hypothesis.strategies import integers, decimals as hypo_decimals, booleans, characters, text, tuples, lists as hypo_lists, just, one_of +from hypothesis.strategies import deferred, recursive + +from actinide.symbol_table import * +from actinide.types import * + +# Generators for forms. Where these generators create a symbol, they will use +# the global symbol table defined in this module. Do not do this in your own +# code! A global symbol table is a memory leak, and only the fact that tests +# exit before they can do any substantial damage prevents this from being a +# bigger problem. +# +# Each generator produces the parsed version of a form. + +symbol_table = SymbolTable() + +# Generates nil. +def nils(): + return just(None) + +# Generates integers. +def ints(): + return integers() + +# Generates language decimals. +def decimals(): + return hypo_decimals(allow_nan=False, allow_infinity=False) + +# Generates booleans. +def bools(): + return booleans() + +# Generates strings. +def strings(): + return text() + +# Generates any character legal in a symbol, which cannot be part of some other +# kind of atom. +def symbol_characters(): + return characters(blacklist_characters='01234567890#. \t\n();"') + +# Generates symbols guaranteed not to conflict with other kinds of literal. This +# is a subset of the legal symbols. +def symbols(): + return text(symbol_characters(), min_size=1)\ + .map(lambda item: symbol_table[item]) + +# Generates atoms. +def atoms(): + return one_of( + nils(), + ints(), + decimals(), + bools(), + strings(), + symbols(), + ) + +# Generates arbitrary conses, with varying depth. This may happen to generate +# lists by accident. +def conses(): + return recursive( + tuples(atoms(), atoms()).map(lambda elems: cons(*elems)), + lambda children: tuples(children | atoms(), children | atoms()).map(lambda elems: cons(*elems)), + ) + +# Generates lists, with varying depth. +def lists(): + return recursive( + hypo_lists(atoms()).map(lambda elems: list(*elems)), + lambda children: hypo_lists(children | atoms()).map(lambda elems: list(*elems)), + ) + +# Generates random forms. +def forms(): + return one_of(nils(), ints(), bools(), strings(), symbols(), conses(), lists()) diff --git a/tests/programs.py b/tests/programs.py new file mode 100644 index 0000000..78c52c7 --- /dev/null +++ b/tests/programs.py @@ -0,0 +1,57 @@ +from hypothesis.strategies import integers, decimals, booleans, text, tuples +from hypothesis.strategies import one_of, composite, deferred + +from actinide.types import * +from actinide.environment import * +from actinide.symbol_table import * + +symbol_table = SymbolTable() + +def literals(): + return one_of( + booleans(), + integers(), + decimals(allow_nan=False, allow_infinity=False), + text(), + ).map(lambda value: (value, (value,), [])) + +def symbols(): + return text().map(lambda symb: symbol_table[symb]) + +def values(): + return literals() + +@composite +def ifs(draw, conds, trues, falses): + cond, (cond_result,), cond_bindings = draw(conds) + true, true_result, true_bindings = draw(trues) + false, false_result, false_bindings = draw(falses) + + expr = list(symbol_table['if'], cond, true, false) + result = true_result if cond_result else false_result + bindings = cond_bindings + (true_bindings if cond_result else false_bindings) + + return expr, result, bindings + +def if_exprs(): + return ifs(exprs(), exprs(), exprs()) + +def if_progs(): + return ifs(exprs(), programs(), programs()) + +@composite +def defines(draw): + symbol = draw(symbols()) + value, (value_result,), value_bindings = draw(values()) + return ( + list(symbol_table['define'], symbol, value), + (), + value_bindings + [(symbol, value_result)], + ) + +def exprs(): + return deferred(lambda: one_of(literals(), if_exprs())) + +def programs(): + return deferred(lambda: one_of(literals(), defines(), if_progs())) + diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py new file mode 100644 index 0000000..d989f85 --- /dev/null +++ b/tests/test_evaluator.py @@ -0,0 +1,18 @@ +from hypothesis import given, event + +from actinide.evaluator import * +from actinide.environment import * +from actinide.types import * + +from .programs import * + +# Cases for the evaluator: + +# * Given a program, does it produce the expected evaluation? +@given(programs()) +def test_evaluator(program_result): + program, result, bindings = program_result + environment = Environment() + assert run(eval(program, environment, symbol_table, None)) == result + for symbol, value in bindings: + assert environment[symbol] == value diff --git a/tests/test_ports.py b/tests/test_ports.py index c2d1e06..d755dcf 100644 --- a/tests/test_ports.py +++ b/tests/test_ports.py @@ -6,24 +6,24 @@ from actinide.ports import * @given(text(), integers(min_value=1, max_value=2**32 - 1)) def test_read(input, n): port = string_to_input_port(input) - output = read(port, n) + output = read_port(port, n) assert input.startswith(output) assert (len(output) == 0 and len(input) == 0) != (0 < len(output) <= n) - assert output + read_fully(port) == input + assert output + read_port_fully(port) == input @given(text(), integers(min_value=1, max_value=2**32 - 1)) def test_peek(input, n): port = string_to_input_port(input) - output = peek(port, n) + output = peek_port(port, n) assert input.startswith(output) assert (len(output) == 0 and len(input) == 0) != (0 < len(output) <= n) - assert read_fully(port) == input + assert read_port_fully(port) == input @given(text(), integers(min_value=1, max_value=2**32 - 1)) def test_read_fully(input, n): port = string_to_input_port(input) - output = read_fully(port) + output = read_port_fully(port) assert output == input diff --git a/tests/test_reader.py b/tests/test_reader.py new file mode 100644 index 0000000..54a1681 --- /dev/null +++ b/tests/test_reader.py @@ -0,0 +1,28 @@ +from hypothesis import given +from hypothesis.strategies import text + +from actinide.reader import * +from actinide.ports import * +from actinide.types import * + +from .forms import * + +# Cases for the reader: + +# * Given a form, can the reader recover it from its display? +@given(forms()) +def test_reader(form): + input = display(form) + port = string_to_input_port(input) + + assert read(port, symbol_table) == form + +# * Given a form and some trailing garbage, can the reader recover the form +# without touching the garbage? This is only reliable with lists and conses. +@given(lists() | conses(), text()) +def test_reader_with_trailing(form, text): + input = display(form) + text + port = string_to_input_port(input) + + assert read(port, symbol_table) == form + assert read_port_fully(port) == text diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 5c0ddea..7e5c7b3 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -1,6 +1,4 @@ -from hypothesis import given, settings, HealthCheck, event -from hypothesis.strategies import just, text, characters, from_regex, one_of, tuples, sampled_from -import io +from hypothesis import given from actinide.tokenizer import * from actinide.ports import * |
