Skip to content

Commit

Permalink
Tokenize quoted-and-escaped strings and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
drdnar committed Jan 22, 2015
1 parent ea2171f commit 670c4a1
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 40 deletions.
2 changes: 2 additions & 0 deletions eZasm/Assembler/Token.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public enum TokenClass
Operator,
IndentWhitespace,
NewLineWhitespace,
QuotedString,
Comment,
}

public readonly TokenClass Type;
Expand Down
26 changes: 20 additions & 6 deletions eZasm/Assembler/Tokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,20 @@ public class Tokenizer
/// <summary>
/// String containing a regex that describes what an operator looks like
/// </summary>
internal readonly static string Operators = "!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\[\\]\\\\{\\}\\<\\>,\\|\\-\\+=`~/:;\"'";
internal readonly static string Operators = "!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\[\\]\\\\{\\}\\<\\>,\\.\\|\\-\\+=`~/:\"'";
/// <summary>
/// String containing a regex that describes what whitespace looks like
/// </summary>
internal readonly static string WhitespaceChars = " \\t";
internal readonly static string NewLineChars = "(\\n|\\r|\\n\\r)";
internal readonly static string NewLineChars = "(\\r\\n|\\n|\\r)";
internal readonly static string QuotedStringChars = "\"([^\\\\\"\r\n]|(\\\\[\"ntr\\\\]))*\"";
internal readonly static string CommentChars = ";[^\\r\\n]*";
// ("[^"
/// <summary>
/// Regex that describes what a token looks like
/// </summary>
internal readonly static Regex TokenRegex = new Regex("([^" + Operators + WhitespaceChars + "]+|[" + Operators + "]|[" + WhitespaceChars + "]+|" + NewLineChars + "+)");
//internal readonly static Regex TokenRegex = new Regex("(|" + QuotedStringChars + "|[^" + Operators + WhitespaceChars + "\\n\\r]+|[" + Operators + "]|[" + WhitespaceChars + "]+|" + NewLineChars + ")");
internal readonly static Regex TokenRegex = new Regex("(" + QuotedStringChars + "|" + CommentChars + "|[^" + Operators + WhitespaceChars + "\\n\\r]+|[" + Operators + "]|[" + WhitespaceChars + "]+|" + NewLineChars + ")");
/// <summary>
/// Regex that describes what an operator looks like
/// </summary>
Expand All @@ -38,7 +42,14 @@ public class Tokenizer
/// Regex that describes what an indent looks like
/// </summary>
internal readonly static Regex IsNewLineToken = new Regex(NewLineChars);

/// <summary>
/// This is ugly.
/// </summary>
internal readonly static Regex IsQuotedStringToken = new Regex(QuotedStringChars);
/// <summary>
/// At least checking for comments is easy.
/// </summary>
internal readonly static Regex IsCommentToken = new Regex(CommentChars);

/// <summary>
/// Text to parse
Expand Down Expand Up @@ -75,15 +86,18 @@ public Token GetNextToken()
{
if (TokensLeft == 0)
return null;
if (!FirstToken)
MatchesEnumerator.MoveNext();
MatchesEnumerator.MoveNext();
FirstToken = false;
CurrentMatch = ((Match)MatchesEnumerator.Current);
CurrentString = CurrentMatch.Value;
TokensLeft--;
Token token = null;
if (IsOperatorToken.IsMatch(CurrentString))
token = new Token(Token.TokenClass.Operator, CurrentString, InputFile, LineNumber);
else if (IsCommentToken.IsMatch(CurrentString))
token = new Token(Token.TokenClass.Comment, CurrentString, InputFile, LineNumber);
else if (IsQuotedStringToken.IsMatch(CurrentString))
token = new Token(Token.TokenClass.QuotedString, CurrentString, InputFile, LineNumber);
else if (IsNewLineToken.IsMatch(CurrentString))
token = new Token(Token.TokenClass.NewLineWhitespace, CurrentString, InputFile, LineNumber++);
else if (IsWhitespaceToken.IsMatch(CurrentString))
Expand Down
70 changes: 36 additions & 34 deletions eZasm/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,46 @@ class Program
{
static void Main(string[] args)
{
string operators = "!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\[\\]\\\\{\\}\\<\\>,\\|\\-\\+=`~/:;\"'";
string whitespace = " \\r\\n\\t\\f";
Regex nextToken = new Regex("([^" + operators + whitespace + "]+|[" + operators + "]|[" + whitespace + "]+)");
Regex isOperatorToken = new Regex("[" + operators + "]");
Regex isWhitespaceToken = new Regex("[" + whitespace + "]");
Regex isIndent = new Regex("[ \t]+");
InputFile blah = new InputFile();
blah.Text = ".org 9D95h\r\n\tld hl, hello\r\n\tb_call(_PutS);Show something\r\n\tb_call(_NewLine)\r\n\tret\r\n; String to show\r\nhello: .db \"Hello; \\\"world\\\\!\\\"\", 0";
Tokenizer tokenizer = new Tokenizer(blah);

string[] text = new string[]
{
".org 9D95h",
"\tld hl, hello",
"\tb_call(_PutS)",
"\tb_call(_NewLine)",
"; Thingy",
"hello: .db \"Hello, world!\", 0"
};

bool moreTokens = true;

while (moreTokens)
{
return;
}
Console.WriteLine("***BEGIN INPUT***");

string input = "\tld a, (hl) \\ ld (1234h), a\r\n djnz $";
foreach (Match m in nextToken.Matches(input))
Console.WriteLine(blah.Text);
Console.WriteLine("***END INPUT***");
Console.WriteLine("Tokens:");
while (tokenizer.HasMoreTokens)
{
if (isOperatorToken.IsMatch(m.Value))
Console.Write("Operator: ");
else if (isIndent.IsMatch(m.Value))
Console.Write("Indent: ");
else if (isWhitespaceToken.IsMatch(m.Value))
Console.Write("White space: ");
else
Console.Write("Token: ");
//Console.Write(">");
Console.WriteLine(m.Value);
Token x = tokenizer.GetNextToken();
switch (x.Type)
{
case Token.TokenClass.IndentWhitespace:
Console.Write("Indent: ");
break;
case Token.TokenClass.NewLineWhitespace:
Console.WriteLine("Newline.");
break;
case Token.TokenClass.Operator:
Console.Write("Operator: ");
break;
case Token.TokenClass.Symbol:
Console.Write("Symbol: ");
break;
case Token.TokenClass.Comment:
Console.Write("Comment: ");
break;
case Token.TokenClass.QuotedString:
Console.Write("Quoted string: ");
break;
}
if (x.Type != Token.TokenClass.NewLineWhitespace)
{
Console.Write(x.Text);
Console.WriteLine("");
}
}
Console.WriteLine("TODO: MAKE UNIT TESTS TOMORROW");
Console.ReadKey();
}
}
Expand Down

0 comments on commit 670c4a1

Please sign in to comment.