Skip to content

Commit

Permalink
Cleaning up tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
miroiu committed Aug 25, 2021
1 parent 0cf073c commit da5466d
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 61 deletions.
10 changes: 10 additions & 0 deletions StringMath/Tokenizer/ITokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace StringMath
{
/// <summary>Contract for tokenizers.</summary>
internal interface ITokenizer
{
/// <summary>Reads the next token in the token stream.</summary>
/// <returns>A token.</returns>
Token ReadToken();
}
}
26 changes: 22 additions & 4 deletions StringMath/Tokenizer/Token.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
namespace StringMath
{
internal struct Token
/// <summary>A token containing basic information about some text.</summary>
internal readonly struct Token
{
public int Position;
public string Text;
public TokenType Type;
/// <summary>Initializes a new instance of a token.</summary>
/// <param name="type">The token type.</param>
/// <param name="text">The token value.</param>
/// <param name="position">The token's position in the input string.</param>
public Token(TokenType type, string text, int position)
{
Type = type;
Text = text;
Position = position;
}

/// <summary>The token's position in the input string.</summary>
public readonly int Position;

/// <summary>The token value.</summary>
public readonly string Text;

/// <summary>The token type.</summary>
public readonly TokenType Type;

/// <inheritdoc />
public override string ToString()
{
return $"{Text} ({Type}):{Position}";
Expand Down
3 changes: 3 additions & 0 deletions StringMath/Tokenizer/TokenType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
/// <summary>Available token types.</summary>
internal enum TokenType
{
/// <summary>Unknown token.</summary>
Unknown,

/// <summary>\0</summary>
EndOfCode,

Expand Down
42 changes: 19 additions & 23 deletions StringMath/Tokenizer/Tokenizer.Helpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@ private string ReadIdentifier(ISourceText stream)
}
else
{
throw LangException.UnexpectedToken(new Token
{
Position = stream.Position,
Text = stream.Current.ToString()
}, TokenType.Identifier);
Token token = new Token(TokenType.Unknown, stream.Current.ToString(), stream.Position);
throw LangException.UnexpectedToken(token, TokenType.Identifier);
}

while (stream.Current != identifierTerminator)
Expand All @@ -34,23 +31,22 @@ private string ReadIdentifier(ISourceText stream)
}
else
{
throw LangException.UnexpectedToken(new Token
{
Position = stream.Position,
Text = stream.Current.ToString()
}, identifierTerminator);
Token token = new Token(TokenType.Unknown, stream.Current.ToString(), stream.Position);
throw LangException.UnexpectedToken(token, identifierTerminator);
}
}

builder.Append(stream.Current);
stream.MoveNext();
string text = builder.ToString();

return text.Length == 2 ? throw LangException.UnexpectedToken(new Token
if (text.Length == 2)
{
Position = stream.Position - 1,
Text = identifierTerminator.ToString()
}, identifierTerminator) : text;
Token token = new Token(TokenType.Unknown, identifierTerminator.ToString(), stream.Position - 1);
throw LangException.UnexpectedToken(token, identifierTerminator);
}

return text;
}

private string ReadOperator(ISourceText stream)
Expand Down Expand Up @@ -84,11 +80,8 @@ private string ReadNumber(ISourceText stream)
}
else
{
throw LangException.UnexpectedToken(new Token
{
Position = stream.Position,
Text = stream.Current.ToString()
}, TokenType.Number);
Token token = new Token(TokenType.Unknown, stream.Current.ToString(), stream.Position);
throw LangException.UnexpectedToken(token, TokenType.Number);
}
}
else if (char.IsDigit(stream.Current))
Expand All @@ -103,11 +96,14 @@ private string ReadNumber(ISourceText stream)
}

char peeked = stream.Peek(-1);
return peeked == '.' ? throw LangException.UnexpectedToken(new Token

if (peeked == '.')
{
Position = stream.Position,
Text = peeked.ToString()
}, TokenType.Number) : builder.ToString();
Token token = new Token(TokenType.Unknown, peeked.ToString(), stream.Position);
throw LangException.UnexpectedToken(token, TokenType.Number);
}

return builder.ToString();
}

private void ReadWhiteSpace(ISourceText stream)
Expand Down
42 changes: 8 additions & 34 deletions StringMath/Tokenizer/Tokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@

namespace StringMath
{
/// <summary>Contract for tokenizers.</summary>
internal interface ITokenizer
{
/// <summary>Reads the next token in the token stream.</summary>
/// <returns>A token.</returns>
Token ReadToken();
}

/// <inheritdoc />
internal sealed partial class Tokenizer : ITokenizer
{
Expand All @@ -27,7 +19,7 @@ public Tokenizer(ISourceText text)
{
_text = text;
}

/// <summary>Creates a new instance of the tokenizer.</summary>
/// <param name="text">The text to tokenize.</param>
public Tokenizer(string text) : this(new SourceText(text))
Expand All @@ -37,12 +29,6 @@ public Tokenizer(string text) : this(new SourceText(text))
/// <inheritdoc />
public Token ReadToken()
{
Token token = new Token
{
Text = $"{_text.Current}",
Position = _text.Position
};

switch (_text.Current)
{
case '.':
Expand All @@ -56,29 +42,22 @@ public Token ReadToken()
case '7':
case '8':
case '9':
token.Type = TokenType.Number;
token.Text = ReadNumber(_text);
break;
return new Token(TokenType.Number, ReadNumber(_text), _text.Position);

case '(':
token.Type = TokenType.OpenParen;
_text.MoveNext();
break;
return new Token(TokenType.OpenParen, "(", _text.Position);

case ')':
token.Type = TokenType.CloseParen;
_text.MoveNext();
break;
return new Token(TokenType.CloseParen, ")", _text.Position);

case '{':
token.Type = TokenType.Identifier;
token.Text = ReadIdentifier(_text);
break;
return new Token(TokenType.Identifier, ReadIdentifier(_text), _text.Position);

case '!':
token.Type = TokenType.Exclamation;
_text.MoveNext();
break;
return new Token(TokenType.Exclamation, "!", _text.Position);

case ' ':
case '\t':
Expand All @@ -91,17 +70,12 @@ public Token ReadToken()
return ReadToken();

case '\0':
token.Type = TokenType.EndOfCode;
break;
return new Token(TokenType.EndOfCode, "\0", _text.Position);

default:
string op = ReadOperator(_text);
token.Text = op;
token.Type = TokenType.Operator;
break;
return new Token(TokenType.Operator, op, _text.Position);
}

return token;
}

/// <inheritdoc />
Expand Down

0 comments on commit da5466d

Please sign in to comment.