Big-ass coding binge presents: a Lisp.

This implements a continuation-passing interpreter, which means we get tail calls ferfree. I stopped short of implementing call/cc, because I don't think we need it, but we can get there if we have to.
author: Owen Jacobson <owen@grimoire.ca> 2017-11-13 01:00:33 -0500
committer: Owen Jacobson <owen@grimoire.ca> 2017-11-13 01:01:34 -0500
commit: 5cc96a0fb06fa7d86563f4cb64e5fa9d4f6a09f9 (patch)
tree: 40052668ffe030452b45e3a2d6be8d8fc24acdee
parent: 6a635660bd7b47238642d4a552782687352555ac (diff)
16 files changed, 892 insertions, 15 deletions
diff --git a/actinide/__init__.py b/actinide/__init__.py
index e69de29..afa27d8 100644
--- a/actinide/__init__.py
+++ b/actinide/__init__.py
@@ -0,0 +1,41 @@
+from . import builtin, ports, symbol_table, reader, evaluator, types
+
+class Session(object):
+    def __init__(self):
+        self.symbols = symbol_table.SymbolTable()
+        self.environment = evaluator.Environment()
+
+    def read(self, port):
+        if types.string_p(port):
+            port = ports.string_to_input_port(port)
+        return reader.read(port, self.symbols)
+
+    def eval(self, form):
+        cps = evaluator.eval(form, self.environment, self.symbols, None)
+        return evaluator.run(cps)
+
+    def bind(self, symb, value):
+        self.environment[self.symbol(symb)] = value
+
+    def bind_void(self, fn):
+        return self.bind_builtin(builtin.wrap_void(fn))
+
+    def bind_fn(self, fn):
+        return self.bind_builtin(builtin.wrap_fn(fn))
+
+    def bind_builtin(self, fn):
+        name = builtin.lisp_name(fn)
+        if name is None:
+            raise ValueError("Anonymous functions must be bound using `bind`")
+        symb = self.symbol(name)
+        self.bind(symb, fn)
+        return symb
+
+    def get(self, symb):
+        symb = self.symbol(symb)
+        return self.environment.find(symb)
+
+    def symbol(self, symb):
+        if types.string_p(symb):
+            symb = types.symbol(symb, self.symbols)
+        return symb
diff --git a/actinide/builtin.py b/actinide/builtin.py
new file mode 100644
index 0000000..06623b3
--- /dev/null
+++ b/actinide/builtin.py
@@ -0,0 +1,44 @@
+# ## Tools for binding Python functions into a lisp environment.
+
+from functools import wraps
+
+# Derives the lisp name of a python function or method handle, as follows:
+#
+# * A trailing '_p' in the Python name is converted into a question mark in the
+#   lisp name.
+#
+# * Any remaining underscores in the Python name are converted to dashes in the
+#   lisp name.
+#
+# This only applies to named functions. Python lambdas and non-function
+# callables do not have names.
+def lisp_name(fn):
+    name = fn.__name__
+    if name == '<lambda>':
+        return None
+
+    # Trailing _p is a predicate, translate to ?
+    if name.endswith('_p'):
+        name = name[:-2] + '?'
+
+    # Remaining underscores are dashes
+    name = name.replace('_', '-')
+
+    return name
+
+# Wraps a function which returns no values in a function which returns an empty
+# tuple.
+def wrap_void(fn):
+    @wraps(fn)
+    def wrapper(*args):
+        fn(*args)
+        return ()
+    return wrapper
+
+# Wraps a function which returns one value in a function which returns a
+# one-valued tuple.
+def wrap_fn(fn):
+    @wraps(fn)
+    def wrapper(*args):
+        return fn(*args),
+    return wrapper
diff --git a/actinide/environment.py b/actinide/environment.py
new file mode 100644
index 0000000..29399fe
--- /dev/null
+++ b/actinide/environment.py
@@ -0,0 +1,30 @@
+# Raised if a binding cannot be found.
+class BindingError(Exception):
+    pass
+
+# A lookup table binding symbols to values. This may have a parent environment,
+# in which case lookups will look through to the parent environment if they are
+# not found in the current environment. This allows nested scopes.
+class Environment(dict):
+    # Creates an environment, optionally setting initial values and optionally
+    # adding a parent to look in for values not found in this environment.
+    def __init__(self, bindings=(), parent=None):
+        self.parent = parent
+        self.update(bindings)
+
+    # Look up a binding in this environment, or in any parent environment.
+    # Unlike ``[]``, this will continue into any parent environments, raising an
+    # exception if the name cannot be found in any of them.
+    #
+    # The value from the innermost environment containing the name will be
+    # returned.
+    def find(self, name):
+        if name in self:
+            return self[name]
+        if self.parent != None:
+            return self.parent.find(name)
+        raise BindingError(f'Variable {name} not bound')
+
+    # Sets a value in the current environment.
+    def define(self, name, value):
+        self[name] = value
diff --git a/actinide/evaluator.py b/actinide/evaluator.py
new file mode 100644
index 0000000..49587e2
--- /dev/null
+++ b/actinide/evaluator.py
@@ -0,0 +1,193 @@
+from .environment import *
+
+# Circular import. Hard to avoid: `eval` calls `lambda_`, `lambda_` eventually
+# calls `Procedure`, and `Procedure` calls `eval`. We indirect the call through
+# the module object to avoid problems with import order.
+from . import types as t
+
+# ## EVALUATION
+#
+# The following system implements a continuation-passing interpreter for
+# Actinide. It is heavily inspired by the redex/continuation system described in
+# <https://docs.racket-lang.org/reference/eval-model.html> for the Racket
+# language.
+#
+# The call stack of an Actinide program is implemented as a chain of
+# continuation functions, which grows and shrinks as evaluation proceeds.
+# Because the evaluator itself is tail-recursive, tail calls are free in
+# Actinide - they do not cause the call stack to grow.
+#
+# In principle, this could expose ``call/cc``, but the need for that primitive
+# hasn't come up.
+#
+# Evaluating a form in this system requires both an environment (from the
+# ``environment`` module) and a symbol table (from the ``symbol_table`` module).
+# Callers are encouraged to use the ``Session`` system exposed in the root of
+# this package, instead of calling the invoker directly: a session manages these
+# resources automaticallty. The more intrepid user can manually compile an
+# arbitrary expression using the ``eval`` continuation factory, described below.
+#
+# If the interpreter encounters an error during evaluation, the resulting Python
+# exception bubbles up out of the interpreter and destroys the state of the
+# computation.
+
+# Reduce a continuation to its final value.
+#
+# This iteratively calls the current continuation with the current arguments
+# until the current continuation is None, then exits, returning the final
+# arguments.
+#
+# This trampoline exists to deal with the absence of tail call optimizations in
+# Python. By returning the next continuation rather than invoking it, we avoid
+# growing the Python call stack without bound.
+#
+# The result of evaluating a continuation is always a Python tuple. For
+# expressions, this tuple contains the value(s) produced by the expression. For
+# forms which do not produce a value, this returns the empty tuple.
+def run(continuation, args=()):
+    while continuation is not None:
+        continuation, args = continuation(*args)
+    return args
+
+# ## FLAT CONTINUATIONS
+#
+# These continuation factories and transformers produce continuations which
+# receive already-evaluated values, and which produce evaluated results.
+
+# Returns a continuation which yields a single value, verbatim, and chains to a
+# known target continuation. This implements evaluation for literals.
+def literal(value, continuation):
+    return lambda: (continuation, (value,))
+
+# Returns a continuation which looks up a symbol in an environment, yields the
+# result, and chains to a known target continuation. This implements evaluation
+# for variable lookups.
+def symbol(symb, environment, continuation):
+    return lambda: (continuation, (environment.find(symb),))
+
+# Returns a continuation which yields a newly-created procedure, and chains to a
+# known target continuation. This implements evaluation for the tail of a lambda
+# form. (The head of the lambda form must be discarded before calling this
+# factory.)
+def lambda_(defn, environment, symbols, continuation):
+    formals = t.flatten(t.head(defn))
+    body = t.head(t.tail(defn))
+    proc = t.Procedure(body, formals, environment, symbols)
+    return lambda: (continuation, (proc,))
+
+# Returns a continuation which takes a value and binds that value to a symbol in
+# a specific environment, then chains to a known target continuation. This
+# implements evaluation of the `define` special form, once the value is known.
+def bind(symbol, environment, continuation):
+    def bind_(value):
+        environment.define(symbol, value)
+        return (continuation, ())
+    return bind_
+
+# Returns a continuation which takes a value and returns one of two known target
+# continuations based on the value. If the value is true (Python truthy), this
+# chains to the `on_true` continuation. If the value is not true (Python falsy),
+# this chains to the `on_false` continuation. In either case, the continuation
+# not chained to is discarded.
+def branch(on_true, on_false):
+    return lambda val: (on_true if val else on_false, ())
+
+# Returns a continuation which receives values, and appends them to the values
+# passed to this factory, before chaining to a known target continuation. This
+# implements intermediate evaluation of list forms, where part of the list is
+# already known, as well as splicing for forms that yield multiple values.
+def append(args, continuation):
+    return lambda *tail: (continuation, (*args, *tail))
+
+# Transforms a continuation which should receive function results into a
+# function call continuation. A function call continuation receives a function
+# and a sequence of arguments. If the function is a primitive function, the
+# reduction directly calls the function and chains to the wrapped continuation.
+# If the function is a procedure, this instead returns a continuation which will
+# invoke the procedure, then chain to the wrapped continuation.
+def invoke(continuation):
+    def invoke_(fn, *args):
+        if isinstance(fn, t.Procedure):
+            return procedure_call(fn, *args)
+        return builtin(fn, *args)
+
+    def procedure_call(fn, *args):
+        call_env = fn.invocation_environment(*args)
+        call_cont = fn.continuation(call_env, continuation)
+        return (call_cont, ())
+
+    def builtin(fn, *args):
+        result = fn(*args)
+        return (continuation, result)
+    return invoke_
+
+# ## RECURSIVE CONTINUATIONS
+#
+# The following continuation factories recurse, producing complex chains of
+# continuations.
+
+# Returns a continuation which, when invoked, evaluates the first step of a form
+# and chains to a continuation for the remainder of the form. When the form is
+# exhausted, the final continuation chains to a known target continuation.
+#
+# This is the heart of the continuation-passing transformation. Every valid form
+# can be translated into continuation-passing form throught this factory. This
+# handles literals, symbols, special forms, and function application.
+def eval(value, environment, symbols, continuation):
+    if t.symbol_p(value):
+        return symbol(value, environment, continuation)
+    if not t.list_p(value):
+        return literal(value, continuation)
+    # Special forms (all of which begin with a special symbol, discarded here)
+    if t.head(value) == symbols['if']:
+        return if_(t.tail(value), environment, symbols, continuation)
+    if t.head(value) == symbols['define']:
+        return define(t.tail(value), environment, symbols, continuation)
+    if t.head(value) == symbols['lambda']:
+        return lambda_(t.tail(value), environment, symbols, continuation)
+    # Ran out of alternatives, must be a function application
+    return apply(value, environment, symbols, invoke(continuation))
+
+# Returns a continuation which fully evaluates a `(define symbol expr)` form,
+# before chaining to a known target continuation. First, the returned
+# continuation evaluates the `expr` (recursively, using `eval` to construct the
+# continuation`. The result of this evaluation is chained to a `bind`
+# continuation, to store the result of evaluation in the target environment.
+# Finally, the `bind` continuation chains to the target continuation.
+def define(value, environment, symbols, continuation):
+    symb, expr = t.flatten(value)
+
+    if not t.symbol_p(symb):
+        raise RuntimeError("Argument to define not a symbol: {t.display(symb)}")
+
+    bind_cont = bind(symb, environment, continuation)
+    eval_cont = eval(expr, environment, symbols, bind_cont)
+    return lambda: (eval_cont, ())
+
+# Returns a continuation which fully evaluates an `(if cond if-true if-false)`
+# form, before chaining to a known target continuation. First, the returned
+# continuation evaluates the `cond` expression (recursively, using `eval` to
+# construct the continuation), which chains to a `branch` continuation
+# containing continuations for the `if-true` and `if-false` epxressions. The
+# `if-true` and `if-false` continuations each chain to the target continuation.
+def if_(value, environment, symbols, continuation):
+    cond, if_true, if_false = t.flatten(value)
+
+    if_true_cont = eval(if_true, environment, symbols, continuation)
+    if_false_cont = eval(if_false, environment, symbols, continuation)
+    branch_cont = branch(if_true_cont, if_false_cont)
+
+    return eval(cond, environment, symbols, branch_cont)
+
+# Returns a continuation which fully evaluates the elements of a list, before
+# chaining to a target continuation. If this is applied to an empty list, the
+# returned continuation accepts arbitrary arguments and chains to the target
+# continuation with those arguments. Otherwise, this evaluates the head of the
+# list (recursively, using `eval` to prepare the continuation), then chains to
+# an `append` continuation to glue the result onto the result of recursively
+# calling `apply` on the tail of the list.
+def apply(list, environment, symbols, continuation):
+    if t.nil_p(list):
+        return lambda *args: (continuation, args)
+    tail_cont = apply(t.tail(list), environment, symbols, continuation)
+    return lambda *args: (eval(t.head(list), environment, symbols, append(args, tail_cont)), ())
diff --git a/actinide/ports.py b/actinide/ports.py
index 7748cd7..6f419e5 100644
--- a/actinide/ports.py
+++ b/actinide/ports.py
@@ -36,17 +36,17 @@ class Port(object):
 # Read at least 1 and up to ``n`` characters from a port. This consumes them
 # from the port: they are no longer available to future peeks or reads. ``n``
 # must be strictly positive.
-def read(port, n):
+def read_port(port, n):
     return port.read(n)
 
 # Read all remaining input from a port, consuming it.
-def read_fully(port):
+def read_port_fully(port):
     return port.read_fully()
 
 # Read at least 1 and up to ``n`` characters from a port, without consuming
 # them. They will be available on future peeks and reads. ``n`` must be strictly
 # positive.
-def peek(port, n):
+def peek_port(port, n):
     return port.peek(n)
 
 # Create an input port from a string.
diff --git a/actinide/reader.py b/actinide/reader.py
new file mode 100644
index 0000000..06d774c
--- /dev/null
+++ b/actinide/reader.py
@@ -0,0 +1,131 @@
+# ## The Reader
+#
+# Reads one Actinide form and parses it.
+
+from decimal import Decimal, InvalidOperation
+from .tokenizer import read_token
+from .types import *
+
+# Raised if the reader encounters invalid syntax in the underlying port.
+class SyntaxError(Exception):
+    pass
+
+# Reads one form from a port. This will consume all of the tokens included in
+# the form, but leave trailing data after the final token untouched.
+#
+# Any symbols read will be resolved using the passed symbol table (from the
+# ``symbol_table`` module), to implement interning.
+#
+# This uses a simple recursive descent algorithm to read forms. If the first
+# token is a ``(``, this recursively reads forms until it encounters a trailing
+# ``)``, and builds a list (or a dotted pair) out of the forms read. Otherwise,
+# this reads the atom found.
+def read(port, symbols):
+    head = read_token(port)
+    if head is None:
+        raise SyntaxError("Unexpected end of input")
+    if head == ')':
+        raise SyntaxError("Unexpected ')'")
+    if head == '(':
+        return read_list(port, symbols)
+    return read_atom(head, symbols)
+
+# Reads the body of a list from a port. This will read forms, recursively, until
+# it encounters the terminating ``)`` that closes the current list, or until it
+# encounters a ``.``, in which case it reads a trailing dotted pair.
+#
+# Under the hood, this method only reads the first token. If the list continues,
+# subsequent atoms are read using read_list_tail, which handles lists and dotted
+# pairs.
+def read_list(port, symbols):
+    head = read_token(port)
+    if head is None:
+        raise SyntaxError("Unexpected end of input")
+    if head == ')':
+        return list()
+    if head == '(':
+        return cons(
+            read_list(port, symbols),
+            read_list_tail(port, symbols),
+        )
+    return cons(
+        read_atom(head, symbols),
+        read_list_tail(port, symbols),
+    )
+
+# Reads the tail of a list from a port, recursively. This will read forms, until
+# it encounters either a ``)`` (which terminates the list) or a ``.`` (which
+# begins a terminating sequence of dotted pairs).
+def read_list_tail(port, symbols):
+    head = read_token(port)
+    if head is None:
+        raise SyntaxError("Unexpected end of input")
+    if head == ')':
+        return None
+    if head == '(':
+        return cons(
+            read_list(port, symbols),
+            read_list_tail(port, symbols),
+        )
+    if head == '.':
+        return read_cons_head(port, symbols)
+    return cons(
+        read_atom(head, symbols),
+        read_list_tail(port, symbols),
+    )
+
+# Reads the first atom after a dot, then delegates to ``read_cons_tail`` to read
+# the second. This generates a dotted pair out of the tail of the input.
+def read_cons_head(port, symbols):
+    head = read_token(port)
+    if head is None:
+        raise SyntaxError("Unexpected end of input")
+    if head == ')':
+        raise SyntaxError("Unexpected ')'")
+    if head == '(':
+        return read_cons_tail(
+            read_list(port, symbols),
+            port,
+            symbols,
+        )
+    return read_cons_tail(read_atom(head, symbols), port, symbols)
+
+# Reads the second form of a dotted pair from the input. This must either be a
+# terminating ``)``, or another dotted pair.
+def read_cons_tail(head, port, symbols):
+    tail = read_token(port)
+    if tail is None:
+        raise SyntaxError("Unexpected end of input")
+    if tail == ')':
+        return head
+    if tail == '.':
+        return cons(
+            head,
+            read_cons_head(port, symbols),
+        )
+    raise SyntaxError("Unexpected value after dotted pair")
+
+# Converts an atom into its lisp representation, using the following priorities:
+#
+# * ``read_boolean``
+# * ``read_integer``
+# * ``read_decimal``
+# * ``read_symbol`` in the current symbol table (which always succeeds)
+#
+# The first reader to accept the string determines the type of the result.
+def read_atom(atom, symbols):
+    def read_as_first(val, *funcs):
+        for func in funcs:
+            result = func(val)
+            if result is not None:
+                return result
+
+    if atom[0] == '"':
+        return read_string(atom)
+    return read_as_first(
+        atom,
+        read_boolean,
+        read_integer,
+        read_decimal,
+        lambda val: read_symbol(val, symbols),
+    )
diff --git a/actinide/symbol_table.py b/actinide/symbol_table.py
new file mode 100644
index 0000000..ae8cccf
--- /dev/null
+++ b/actinide/symbol_table.py
@@ -0,0 +1,7 @@
+from .types import Symbol
+
+class SymbolTable(dict):
+    def __missing__(self, key):
+        self[key] = result = Symbol(key)
+        return result
+
diff --git a/actinide/tokenizer.py b/actinide/tokenizer.py
index a69d35d..449fc3d 100644
--- a/actinide/tokenizer.py
+++ b/actinide/tokenizer.py
@@ -1,4 +1,4 @@
-from .ports import read, peek
+from .ports import *
 
 # ## TOKENIZATION
 #
@@ -204,7 +204,6 @@ def tokenize_string_character(state):
 def tokenize_escaped_string_character(state):
     def tokenize_escaped_string_character_next(port):
         next = read_next(port)
-        print(f'Esc: state={repr(state)} next={repr(next)} peek={repr(peek_next(port))}')
         if next == '':
             raise TokenError('Unclosed string literal')
         if next in '\\"':
@@ -221,7 +220,7 @@ def tokenize_string_end(state):
     return tokenize_string_end_next
 
 def read_next(port):
-    return read(port, 1)
+    return read_port(port, 1)
 
 def peek_next(port):
-    return peek(port, 1)
+    return peek_port(port, 1)
diff --git a/actinide/types.py b/actinide/types.py
new file mode 100644
index 0000000..54a0c04
--- /dev/null
+++ b/actinide/types.py
@@ -0,0 +1,218 @@
+# ## Actinide Types
+
+from collections import namedtuple
+from decimal import Decimal, InvalidOperation
+
+# Circular import. Hard to avoid: Procedure calls `eval`, `eval` calls
+# `lambda_`, `lambda_` eventually calls `Procedure`. We indirect the call
+# through the module object to avoid problems with import order.
+from . import evaluator as e
+
+from .environment import *
+
+# ### Nil
+#
+# Nil is a type with a single value, usually taken to denote no value.
+
+nil = None
+
+def nil_p(value):
+    return value is None
+
+def read_nil(value):
+    return nil
+
+def display_nil(value):
+    return '()'
+
+# ### Booleans
+#
+# The true and false values.
+
+true = True
+false = False
+
+def boolean_p(value):
+    return value in (true, false)
+
+def read_boolean(value):
+    if value == '#t':
+        return true
+    if value == '#f':
+        return false
+    return None
+
+def display_boolean(value):
+    return '#t' if value else '#f'
+
+
+# ### Integers
+#
+# These are fixed-precision numbers with no decimal part, obeying common notions
+# of machine integer arithmetic. They support large values.
+
+integer = int
+
+def integer_p(value):
+    return isinstance(value, integer)
+
+def read_integer(value):
+    try:
+        return integer(value)
+    except ValueError:
+        return nil
+
+def display_integer(value):
+    return str(value)
+
+# ### Decimals
+#
+# These are variable-precision numbers, which may have a decimal part.
+
+decimal = Decimal
+
+def decimal_p(value):
+    return isinstance(value, decimal)
+
+def read_decimal(value):
+    try:
+        return decimal(value)
+    except InvalidOperation:
+        return nil
+
+def display_decimal(value):
+    return str(value)
+
+# ### Strings
+#
+# Sequences of characters.
+
+string = str
+
+def string_p(value):
+    return not symbol_p(value) and isinstance(value, string)
+
+def read_string(value):
+    value = value[1:-1]
+    value = value.replace('\\"', '"')
+    value = value.replace('\\\\', '\\')
+    return value
+
+def display_string(value):
+    value = value.replace('\\', '\\\\')
+    value = value.replace('"', '\\"')
+    return f'"{value}"'
+
+# ### Symbols
+#
+# Short, interned strings used as identifiers. Interning is handled by a
+# SymbolTable.
+
+class Symbol(object):
+    def __init__(self, value):
+        self.value = value
+    def __hash__(self):
+        return hash(self.value)
+    def __str__(self):
+        return f"{self.value}"
+    def __repr__(self):
+        return f'Symbol({repr(self.value)})'
+
+def symbol(string, symbol_table):
+    return symbol_table[string]
+
+def symbol_p(value):
+    return isinstance(value, Symbol)
+
+def read_symbol(value, symbol_table):
+    return symbol(value, symbol_table)
+
+def display_symbol(value):
+    return str(value)
+
+# ### Conses
+#
+# Pairs.
+
+Cons = namedtuple('Cons', 'head tail')
+
+def cons(head, tail):
+    return Cons(head, tail)
+
+def cons_p(value):
+    return isinstance(value, Cons)
+
+def head(cons):
+    return cons.head
+
+def tail(cons):
+    return cons.tail
+
+def display_cons(value):
+    parts = []
+    while cons_p(value):
+        parts.append(display(head(value)))
+        value = tail(value)
+    if not nil_p(value):
+        parts.append('.')
+        parts.append(display(value))
+    return '(' + ' '.join(parts) + ')'
+
+# ### Lists
+
+def list(*elems):
+    if elems:
+        head, *tail = elems
+        return cons(head, list(*tail))
+    else:
+        return nil
+
+def list_p(value):
+    return nil_p(value) or cons_p(value) and list_p(tail(value))
+
+def flatten(list):
+    r = []
+    while not nil_p(list):
+        r.append(head(list))
+        list = tail(list)
+    return r
+
+# ### Procedures
+
+class Procedure(object):
+    def __init__(self, body, formals, environment, symbols):
+        self.body = body
+        self.formals = formals
+        self.environment = environment
+        self.symbols = symbols
+
+    def __call__(self, *args):
+        call_env = self.invocation_environment(*args)
+        call_cont = self.continuation(call_env, None)
+        return e.run(call_cont, ())
+
+    def invocation_environment(self, *args):
+        return Environment(zip(self.formals, args), self.environment)
+
+    def continuation(self, environment, continuation):
+        return e.eval(self.body, environment, self.symbols, continuation)
+
+def procedure_p(value):
+    return callable(value)
+
+# ### General-purpose functions
+def display(value):
+    if cons_p(value):
+        return display_cons(value)
+    if symbol_p(value):
+        return display_symbol(value)
+    if string_p(value):
+        return display_string(value)
+    if nil_p(value):
+        return display_nil(value)
+    if boolean_p(value):
+        return display_boolean(value)
+    if integer_p(value):
+        return display_integer(value)
+    if decimal_p(value):
+        return display_decimal(value)
diff --git a/primer.py b/primer.py
new file mode 100644
index 0000000..2c6d73a
--- /dev/null
+++ b/primer.py
@@ -0,0 +1,37 @@
+import actinide
+import actinide.types as t
+import actinide.builtin as b
+import actinide.evaluator as e
+
+session = actinide.Session()
+program = session.read("""
+(begin
+    1
+    1.0
+    "Hello"
+    (define a
+        (lambda (b) (values 1 2.2 "three" a b)))
+    (define pp
+        (lambda () (pp)))
+    (print (a "foo"))
+    (print (eval (list (symbol "a") "bar")))
+    (print 0 (values 1 2 3) 4 5)
+    (pp))
+""")
+
+def begin(*args):
+    if args:
+        return args[-1]
+    return None
+
+def values(*args):
+    return args
+
+session.bind_builtin(values)
+session.bind_void(print)
+session.bind_fn(begin)
+session.bind_fn(t.list)
+session.bind_fn(session.symbol)
+session.bind_builtin(session.eval)
+
+session.eval(program)
diff --git a/tests/forms.py b/tests/forms.py
new file mode 100644
index 0000000..1a49636
--- /dev/null
+++ b/tests/forms.py
@@ -0,0 +1,76 @@
+from hypothesis.strategies import integers, decimals as hypo_decimals, booleans, characters, text, tuples, lists as hypo_lists, just, one_of
+from hypothesis.strategies import deferred, recursive
+
+from actinide.symbol_table import *
+from actinide.types import *
+
+# Generators for forms. Where these generators create a symbol, they will use
+# the global symbol table defined in this module. Do not do this in your own
+# code! A global symbol table is a memory leak, and only the fact that tests
+# exit before they can do any substantial damage prevents this from being a
+# bigger problem.
+#
+# Each generator produces the parsed version of a form.
+
+symbol_table = SymbolTable()
+
+# Generates nil.
+def nils():
+    return just(None)
+
+# Generates integers.
+def ints():
+    return integers()
+
+# Generates language decimals.
+def decimals():
+    return hypo_decimals(allow_nan=False, allow_infinity=False)
+
+# Generates booleans.
+def bools():
+    return booleans()
+
+# Generates strings.
+def strings():
+    return text()
+
+# Generates any character legal in a symbol, which cannot be part of some other
+# kind of atom.
+def symbol_characters():
+    return characters(blacklist_characters='01234567890#. \t\n();"')
+
+# Generates symbols guaranteed not to conflict with other kinds of literal. This
+# is a subset of the legal symbols.
+def symbols():
+    return text(symbol_characters(), min_size=1)\
+        .map(lambda item: symbol_table[item])
+
+# Generates atoms.
+def atoms():
+    return one_of(
+        nils(),
+        ints(),
+        decimals(),
+        bools(),
+        strings(),
+        symbols(),
+    )
+
+# Generates arbitrary conses, with varying depth. This may happen to generate
+# lists by accident.
+def conses():
+    return recursive(
+        tuples(atoms(), atoms()).map(lambda elems: cons(*elems)),
+        lambda children: tuples(children | atoms(), children | atoms()).map(lambda elems: cons(*elems)),
+    )
+
+# Generates lists, with varying depth.
+def lists():
+    return recursive(
+        hypo_lists(atoms()).map(lambda elems: list(*elems)),
+        lambda children: hypo_lists(children | atoms()).map(lambda elems: list(*elems)),
+    )
+
+# Generates random forms.
+def forms():
+    return one_of(nils(), ints(), bools(), strings(), symbols(), conses(), lists())
diff --git a/tests/programs.py b/tests/programs.py
new file mode 100644
index 0000000..78c52c7
--- /dev/null
+++ b/tests/programs.py
@@ -0,0 +1,57 @@
+from hypothesis.strategies import integers, decimals, booleans, text, tuples
+from hypothesis.strategies import one_of, composite, deferred
+
+from actinide.types import *
+from actinide.environment import *
+from actinide.symbol_table import *
+
+symbol_table = SymbolTable()
+
+def literals():
+    return one_of(
+        booleans(),
+        integers(),
+        decimals(allow_nan=False, allow_infinity=False),
+        text(),
+    ).map(lambda value: (value, (value,), []))
+
+def symbols():
+    return text().map(lambda symb: symbol_table[symb])
+
+def values():
+    return literals()
+
+@composite
+def ifs(draw, conds, trues, falses):
+    cond, (cond_result,), cond_bindings = draw(conds)
+    true, true_result, true_bindings = draw(trues)
+    false, false_result, false_bindings = draw(falses)
+
+    expr = list(symbol_table['if'], cond, true, false)
+    result = true_result if cond_result else false_result
+    bindings = cond_bindings + (true_bindings if cond_result else false_bindings)
+
+    return expr, result, bindings
+
+def if_exprs():
+    return ifs(exprs(), exprs(), exprs())
+
+def if_progs():
+    return ifs(exprs(), programs(), programs())
+
+@composite
+def defines(draw):
+    symbol = draw(symbols())
+    value, (value_result,), value_bindings = draw(values())
+    return (
+        list(symbol_table['define'], symbol, value),
+        (),
+        value_bindings + [(symbol, value_result)],
+    )
+
+def exprs():
+    return deferred(lambda: one_of(literals(), if_exprs()))
+
+def programs():
+    return deferred(lambda: one_of(literals(), defines(), if_progs()))
+
diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
new file mode 100644
index 0000000..d989f85
--- /dev/null
+++ b/tests/test_evaluator.py
@@ -0,0 +1,18 @@
+from hypothesis import given, event
+
+from actinide.evaluator import *
+from actinide.environment import *
+from actinide.types import *
+
+from .programs import *
+
+# Cases for the evaluator:
+
+# * Given a program, does it produce the expected evaluation?
+@given(programs())
+def test_evaluator(program_result):
+    program, result, bindings = program_result
+    environment = Environment()
+    assert run(eval(program, environment, symbol_table, None)) == result
+    for symbol, value in bindings:
+        assert environment[symbol] == value
diff --git a/tests/test_ports.py b/tests/test_ports.py
index c2d1e06..d755dcf 100644
--- a/tests/test_ports.py
+++ b/tests/test_ports.py
@@ -6,24 +6,24 @@ from actinide.ports import *
 @given(text(), integers(min_value=1, max_value=2**32 - 1))
 def test_read(input, n):
     port = string_to_input_port(input)
-    output = read(port, n)
+    output = read_port(port, n)
 
     assert input.startswith(output)
     assert (len(output) == 0 and len(input) == 0) != (0 < len(output) <= n)
-    assert output + read_fully(port) == input
+    assert output + read_port_fully(port) == input
 
 @given(text(), integers(min_value=1, max_value=2**32 - 1))
 def test_peek(input, n):
     port = string_to_input_port(input)
-    output = peek(port, n)
+    output = peek_port(port, n)
 
     assert input.startswith(output)
     assert (len(output) == 0 and len(input) == 0) != (0 < len(output) <= n)
-    assert read_fully(port) == input
+    assert read_port_fully(port) == input
 
 @given(text(), integers(min_value=1, max_value=2**32 - 1))
 def test_read_fully(input, n):
     port = string_to_input_port(input)
-    output = read_fully(port)
+    output = read_port_fully(port)
 
     assert output == input
diff --git a/tests/test_reader.py b/tests/test_reader.py
new file mode 100644
index 0000000..54a1681
--- /dev/null
+++ b/tests/test_reader.py
@@ -0,0 +1,28 @@
+from hypothesis import given
+from hypothesis.strategies import text
+
+from actinide.reader import *
+from actinide.ports import *
+from actinide.types import *
+
+from .forms import *
+
+# Cases for the reader:
+
+# * Given a form, can the reader recover it from its display?
+@given(forms())
+def test_reader(form):
+    input = display(form)
+    port = string_to_input_port(input)
+
+    assert read(port, symbol_table) == form
+
+# * Given a form and some trailing garbage, can the reader recover the form
+#   without touching the garbage? This is only reliable with lists and conses.
+@given(lists() | conses(), text())
+def test_reader_with_trailing(form, text):
+    input = display(form) + text
+    port = string_to_input_port(input)
+
+    assert read(port, symbol_table) == form
+    assert read_port_fully(port) == text
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index 5c0ddea..7e5c7b3 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -1,6 +1,4 @@
-from hypothesis import given, settings, HealthCheck, event
-from hypothesis.strategies import just, text, characters, from_regex, one_of, tuples, sampled_from
-import io
+from hypothesis import given
 
 from actinide.tokenizer import *
 from actinide.ports import *
author	Owen Jacobson <owen@grimoire.ca>	2017-11-13 01:00:33 -0500
committer	Owen Jacobson <owen@grimoire.ca>	2017-11-13 01:01:34 -0500
commit	5cc96a0fb06fa7d86563f4cb64e5fa9d4f6a09f9 (patch)
tree	40052668ffe030452b45e3a2d6be8d8fc24acdee
parent	6a635660bd7b47238642d4a552782687352555ac (diff)