summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorOwen Jacobson <owen@grimoire.ca>2017-11-08 04:03:25 -0500
committerOwen Jacobson <owen@grimoire.ca>2017-11-08 04:03:25 -0500
commite157a7a83d5429bca9d564d931ab041fa96cd277 (patch)
tree7487556a8acd2746a4dc2084c42ca706f8e063e6 /tests
parent0fcc2dc618f2eb00d8cf82ce328c98e0ea9f2626 (diff)
Add tests for the individual tokenizer states.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_tokenizer.py193
1 files changed, 193 insertions, 0 deletions
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
new file mode 100644
index 0000000..a300eb2
--- /dev/null
+++ b/tests/test_tokenizer.py
@@ -0,0 +1,193 @@
+from hypothesis import given, settings, HealthCheck
+from hypothesis.strategies import text, from_regex
+import io
+
+from actinide.tokenizer import *
+
+class ReadablePort(io.StringIO):
+ def __repr__(self):
+ # Slightly friendlier debugging output
+ return f"ReadablePort(str={repr(self.getvalue())}, pos={self.tell()})"
+
+def not_(f):
+ return lambda *args, **kwargs: not f(*args, **kwargs)
+
+@given(text(), text())
+def test_tokenize_eof(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_eof(s, port)
+
+ assert token is None
+ assert lookahead == s
+ assert next == tokenize_eof
+ assert port.tell() == 0
+
+def comment_continues(text):
+ if text == '':
+ return False
+ return text[0] != '\n'
+
+@given(text(), text().filter(comment_continues))
+def test_tokenize_comment_continues(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_comment(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == input[0]
+ assert next == tokenize_comment
+
+@given(text(), text().filter(not_(comment_continues)))
+def test_tokenize_comment_ends(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_comment(s, port)
+
+ assert token is None
+ assert port.tell() == (1 if input else 0)
+ assert lookahead == (input[0] if input else '')
+ assert next == tokenize_any
+
+@given(text(), text())
+def test_tokenize_syntax(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_syntax(s, port)
+
+ assert token == Syntax(s)
+ assert isinstance(token, Syntax)
+ assert port.tell() == (1 if input else 0)
+ assert lookahead == (input[0] if input else '')
+ assert next == tokenize_any
+
+@given(text(), text())
+def test_tokenize_whitespace(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_whitespace(s, port)
+
+ assert token is None
+ assert port.tell() == (1 if input else 0)
+ assert lookahead == (input[0] if input else '')
+ assert next == tokenize_any
+
+def symbol_continues(text):
+ if text == '':
+ return False
+ return text[0] not in ' \n\t();"'
+
+@given(text(), text().filter(symbol_continues))
+def test_tokenize_symbol_continues(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_symbol(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == s + input[0]
+ assert next == tokenize_symbol
+
+@given(text(), text().filter(not_(symbol_continues)))
+def test_tokenize_symbol_ends(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_symbol(s, port)
+
+ assert token == Symbol(s)
+ assert isinstance(token, Symbol)
+ assert port.tell() == (1 if input else 0)
+ assert lookahead == (input[0] if input else '')
+ assert next == tokenize_any
+
+def string_continues(text):
+ if text == '':
+ return False
+ return not text[0] == '"'
+
+@given(text(), text().filter(string_continues))
+def test_tokenize_string_continues(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_string(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == input[0]
+ assert next == tokenize_string_character
+
+@given(text(), text().filter(not_(string_continues)))
+def test_tokenize_string_ends(s, input):
+ try:
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_string(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == ''
+ assert next == tokenize_string_end
+ except TokenError:
+ assert input == ''
+ assert port.tell() == 0
+
+def is_escape(text):
+ if text == '':
+ return False
+ return text[0] == '\\'
+
+@given(text(), text().filter(string_continues).filter(not_(is_escape)))
+def test_tokenize_string_character_continues(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_string_character(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == s + input[0]
+ assert next == tokenize_string_character
+
+# Using from_regex() rather than text() because searching randomly for strings
+# that start with a specific character is far, _far_ too slow. (It often fails
+# to find any examples.) I _think_ this preserves the property that this group
+# of three tests are exhaustive, but it's not as obvious as it would be if I
+# could use text() here.
+@given(text(), from_regex(r'\\.*').filter(string_continues).filter(is_escape))
+def test_tokenize_string_character_begins_escape(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_string_character(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == s
+ assert next == tokenize_escaped_string_character
+
+@given(text(), text().filter(not_(string_continues)))
+def test_tokenize_string_character_ends(s, input):
+ try:
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_string_character(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == s
+ assert next == tokenize_string_end
+ except TokenError:
+ assert input == ''
+ assert port.tell() == 0
+
+@given(text(), text())
+def test_tokenize_escaped_string_character(s, input):
+ try:
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_escaped_string_character(s, port)
+
+ assert token is None
+ assert port.tell() == 1
+ assert lookahead == s + input[0]
+ assert next == tokenize_string_character
+ except TokenError:
+ assert input == '' or input[0] not in '\\n'
+ assert port.tell() == (1 if input else 0)
+
+@given(text(), text())
+def test_tokenize_string_end(s, input):
+ port = ReadablePort(input)
+ token, lookahead, next = tokenize_string_end(s, port)
+
+ assert token == s
+ assert isinstance(token, String)
+ assert port.tell() == (1 if input else 0)
+ assert lookahead == (input[0] if input else '')
+ assert next == tokenize_any