From c5a73d4d6180b42896814f4afdc2278503686756 Mon Sep 17 00:00:00 2001
From: Petr Ohlidal <petr@ohlidal.cz>
Date: Wed, 19 Oct 2022 01:06:53 +0200
Subject: [PATCH] unitok V2 - major cleanup.

---
 source/main/utils/GenericFileFormat.cpp | 702 +++++++++++++++++-------
 source/main/utils/GenericFileFormat.h   |   2 +-
 2 files changed, 491 insertions(+), 213 deletions(-)
diff --git a/source/main/utils/GenericFileFormat.cpp b/source/main/utils/GenericFileFormat.cpp
index 6169a0a40f..c57241f443 100644
--- a/source/main/utils/GenericFileFormat.cpp
+++ b/source/main/utils/GenericFileFormat.cpp
@@ -27,309 +27,587 @@
 using namespace RoR;
 using namespace Ogre;
 
-static void BeginToken(Document& doc, const BitMask_t options, std::vector<char>& tok, RoR::TokenType& tok_type, bool& tok_number_dot, bool& tok_string_naked, Ogre::DataStreamPtr datastream, size_t line_num, char c)
+enum class PartialToken
 {
-    if (c == ';' || (c == '/' && (options & Document::OPTION_ALLOW_SLASH_COMMENTS)))
+    NONE,
+    COMMENT_SEMICOLON, // Comment starting with ';'
+    COMMENT_SLASH,     // Comment starting with '//'
+    STRING_QUOTED,     // String starting/ending with '"'
+    STRING_NAKED,      // String without '"' on either end
+    NUMBER,            // Number with digits and optionally leading '-'
+    NUMBER_DOT,        // Like NUMBER but already containing '.'
+    KEYWORD,           // Unqoted string at the start of line
+    BOOL_TRUE,         // Partial 'true'
+    BOOL_FALSE,        // Partial 'false'
+    GARBAGE,           // Text not fitting any above category, will be discarded
+};
+
+struct DocumentParser
+{
+    DocumentParser(Document& d, const BitMask_t opt, Ogre::DataStreamPtr ds)
+        : doc(d), options(opt), datastream(ds) {}
+
+    // Config
+    Document& doc;
+    const BitMask_t options;
+    Ogre::DataStreamPtr datastream;
+
+    // State
+    std::vector<char> tok;
+    size_t line_num = 0;
+    size_t line_pos = 0;
+    PartialToken partial_tok_type = PartialToken::NONE;
+
+    void BeginToken(const char c);
+    void UpdateComment(const char c);
+    void UpdateString(const char c);
+    void UpdateNumber(const char c);
+    void UpdateBool(const char c);
+    void UpdateKeyword(const char c);
+    void UpdateGarbage(const char c);
+};
+
+void DocumentParser::BeginToken(const char c)
+{
+    switch (c)
     {
-        if (doc.tokens.size() == 0 || doc.tokens.back().type == TokenType::LINEBREAK)
-            tok_type = TokenType::COMMENT;
+    case '\r':
+        break;
+
+    case ' ':
+    case ',':
+    case '\t':
+        line_pos++;
+        break;
+
+    case '\n':
+        doc.tokens.push_back({ TokenType::LINEBREAK, 0.f });
+        line_num++;
+        line_pos = 0;
+        break;
+
+    case ';':
+        partial_tok_type = PartialToken::COMMENT_SEMICOLON;
+        line_pos++;
+        break;
+
+    case '/':
+        if (options & Document::OPTION_ALLOW_SLASH_COMMENTS)
+        {
+            partial_tok_type = PartialToken::COMMENT_SLASH;
+        }
+        else if (options & Document::OPTION_ALLOW_NAKED_STRINGS &&
+            (doc.tokens.size() != 0 && doc.tokens.back().type != TokenType::LINEBREAK)) // not first on line?
+        {
+            tok.push_back(c);
+            partial_tok_type = PartialToken::STRING_NAKED;
+        }
         else
-            App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-                fmt::format("{}, line {}: ignoring stray character '{}'", datastream->getName(), line_num, c));
-    }
-    else if (c == '"')
-    {
-        tok_type = TokenType::STRING;
-    }
-    else if (c == '.')
-    {
+        {
+            partial_tok_type = PartialToken::GARBAGE;
+            tok.push_back(c);
+        }
+        line_pos++;
+        break;
+
+    case '"':
+        partial_tok_type = PartialToken::STRING_QUOTED;
+        line_pos++;
+        break;
+
+    case '.':
         tok.push_back(c);
-        tok_type = TokenType::NUMBER;
-        tok_number_dot = true;
-    }
-    else if (isdigit(c) || c == '-')
-    {
+        partial_tok_type = PartialToken::NUMBER_DOT;
+        line_pos++;
+        break;
+
+    case '-':
         tok.push_back(c);
-        tok_type = TokenType::NUMBER;
-    }
-    else if (c == 't' || c == 'f')
-    {
+        partial_tok_type = PartialToken::NUMBER;
+        line_pos++;
+        break;
+
+    case 't':
         tok.push_back(c);
-        tok_type = TokenType::BOOL;
-    }
-    else if (isalpha(c))
-    {
-        if (doc.tokens.size() == 0 || doc.tokens.back().type == TokenType::LINEBREAK)
+        partial_tok_type = PartialToken::BOOL_TRUE;
+        line_pos++;
+        break;
+
+    case 'f':
+        tok.push_back(c);
+        partial_tok_type = PartialToken::BOOL_FALSE;
+        line_pos++;
+        break;
+
+    default:
+        if (isdigit(c))
         {
             tok.push_back(c);
-            tok_type = TokenType::KEYWORD;
+            partial_tok_type = PartialToken::NUMBER;
         }
-        else if (BITMASK_IS_1(options, Document::OPTION_ALLOW_NAKED_STRINGS))
+        else if (isalpha(c) &&
+            (doc.tokens.size() == 0 || doc.tokens.back().type == TokenType::LINEBREAK)) // on line start?
         {
             tok.push_back(c);
-            tok_type = TokenType::STRING;
-            tok_string_naked = true;
+            partial_tok_type = PartialToken::KEYWORD;
+        }
+        else if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+        {
+            tok.push_back(c);
+            partial_tok_type = PartialToken::STRING_NAKED;
         }
         else
         {
-            App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-                fmt::format("{}, line {}: ignoring stray character '{}'", datastream->getName(), line_num, c));
+            partial_tok_type = PartialToken::GARBAGE;
+            tok.push_back(c);
         }
+        line_pos++;
+        break;
     }
-    else
+
+    if (partial_tok_type == PartialToken::GARBAGE)
     {
         App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-            fmt::format("{}, line {}: ignoring stray character '{}'", datastream->getName(), line_num, c));
+            fmt::format("{}, line {}, pos {}: stray character '{}'", datastream->getName(), line_num, line_pos, c));
     }
 }
 
-static void FlushToken(Document& doc, std::vector<char>& tok, RoR::TokenType& tok_type, bool& tok_number_dot, bool& tok_string_naked, Ogre::DataStreamPtr datastream, size_t line_num)
+void DocumentParser::UpdateComment(const char c)
 {
-    if (tok.size() > 0)
+    switch (c)
     {
-        if (tok.back() != '\0')
-            tok.push_back('\0');
-
-        switch (tok_type)
+    case '\r':
+        break;
+
+    case '\n':
+        // Flush comment
+        doc.tokens.push_back({ TokenType::COMMENT, (float)doc.string_pool.size() });
+        tok.push_back('\0');
+        std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        // Break line
+        doc.tokens.push_back({ TokenType::LINEBREAK, 0.f });
+        line_num++;
+        line_pos = 0;
+        break;
+
+    case '/':
+        if (partial_tok_type != PartialToken::COMMENT_SLASH || tok.size() > 0) // With COMMENT_SLASH, skip any number of leading '/'
         {
-        case TokenType::STRING:
-        case TokenType::COMMENT:
-        case TokenType::KEYWORD:
-            doc.tokens.push_back({ tok_type, (float)doc.string_pool.size() });
-            std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
-            break;
-
-        case TokenType::BOOL:
-            if (!std::strcmp(tok.data(), "true"))
-                doc.tokens.push_back({ tok_type, 1.f });
-            else if (!std::strcmp(tok.data(), "false"))
-                doc.tokens.push_back({ tok_type, 0.f });
-            else
-                App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-                    fmt::format("{}, line {}: ignoring garbage token '{}'", datastream->getName(), line_num, tok.data()));
-            break;
-
-        case TokenType::NUMBER:
-            doc.tokens.push_back({ tok_type, (float)Ogre::StringConverter::parseReal(tok.data()) });
-            break;
+            tok.push_back(c);
         }
-        tok.clear();
-    }
+        line_pos++;
+        break;
 
-    tok_type = TokenType::NONE;
-    tok_number_dot = false;
-    tok_string_naked = false;
+    default:
+        tok.push_back(c);
+        line_pos++;
+        break;
+    }
 }
 
-static void ProcessNumber(Document& doc, std::vector<char>& tok, RoR::TokenType& tok_type, bool& tok_number_dot, bool& tok_string_naked, Ogre::DataStreamPtr datastream, size_t line_num, char c)
+void DocumentParser::UpdateString(const char c)
 {
-    if (c == ' ' || c == ',' || c == '\t')
-    {
-        FlushToken(doc, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-    }
-    else if (c == '.')
+    switch (c)
     {
-        if (!tok_number_dot)
+    case '\r':
+        break;
+
+    case ' ':
+    case ',':
+    case '\t':
+        if (partial_tok_type == PartialToken::STRING_QUOTED)
         {
-            tok.push_back(c);
-            tok_number_dot = true;
+            tok.push_back('\0');
         }
-        else
+        else // (partial_tok_type == PartialToken::STRING_NAKED)
         {
+            // Flush string
+            doc.tokens.push_back({ TokenType::STRING, (float)doc.string_pool.size() });
             tok.push_back('\0');
+            std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
+            tok.clear();
+            partial_tok_type = PartialToken::NONE;
+        }
+        line_pos++;
+        break;
+
+    case '\n':
+        if (partial_tok_type == PartialToken::STRING_QUOTED)
+        {
             App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-                fmt::format("{}, line {}: duplicate '.' in number, parsing as '{}'", datastream->getName(), line_num, c, tok.data()));
-            FlushToken(doc, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
+                fmt::format("{}, line {}, pos {}: quoted string interrupted by newline", datastream->getName(), line_num, line_pos));
         }
-    }
-    else if (isdigit(c))
-    {
+        // Flush string
+        doc.tokens.push_back({ TokenType::STRING, (float)doc.string_pool.size() });
+        tok.push_back('\0');
+        std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        // Break line
+        doc.tokens.push_back({ TokenType::LINEBREAK, 0.f });
+        line_num++;
+        line_pos = 0;
+        break;
+
+    case '"':
+        if (partial_tok_type == PartialToken::STRING_QUOTED)
+        {
+            // Flush string
+            doc.tokens.push_back({ TokenType::STRING, (float)doc.string_pool.size() });
+            tok.push_back('\0');
+            std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
+            tok.clear();
+            partial_tok_type = PartialToken::NONE;
+        }
+        else // (partial_tok_type == PartialToken::STRING_NAKED)
+        {
+            partial_tok_type = PartialToken::GARBAGE;
+            tok.push_back(c);
+        }
+        line_pos++;
+        break;
+
+    default:
         tok.push_back(c);
+        line_pos++;
+        break;
     }
-    else
+
+    if (partial_tok_type == PartialToken::GARBAGE)
     {
         App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-            fmt::format("{}, line {}: stray character '{}' in number, parsing as '{}'", datastream->getName(), line_num, c, tok.data()));
-        FlushToken(doc, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
+            fmt::format("{}, line {}, pos {}: stray character '{}' in string", datastream->getName(), line_num, line_pos, c));
     }
 }
 
-static void ProcessBool(Document& doc, const BitMask_t options, std::vector<char>& tok, RoR::TokenType& tok_type, bool& tok_number_dot, bool& tok_string_naked, Ogre::DataStreamPtr datastream, size_t line_num, char c)
+void DocumentParser::UpdateNumber(const char c)
 {
-    // Note: t (true) / f (false) are handled by BeginToken()
-    bool valid = false;
-    bool flush = false;
-    if (tok[0] == 't')
+    switch (c)
     {
-        valid = (
-            (tok.size() == 1 && c == 'r') ||
-            (tok.size() == 2 && c == 'u') ||
-            (tok.size() == 3 && c == 'e'));
-        flush = valid && tok.size() == 3;
+    case '\r':
+        break;
+
+    case ' ':
+    case ',':
+    case '\t':
+        // Flush number
+        tok.push_back('\0');
+        doc.tokens.push_back({ TokenType::NUMBER, (float)Ogre::StringConverter::parseReal(tok.data()) });
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        line_pos++;
+        break;
+
+    case '\n':
+        // Flush number
+        tok.push_back('\0');
+        doc.tokens.push_back({ TokenType::NUMBER, (float)Ogre::StringConverter::parseReal(tok.data()) });
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        // Break line
+        doc.tokens.push_back({ TokenType::LINEBREAK, 0.f });
+        line_num++;
+        line_pos = 0;
+        break;
+
+    case '-':
+        partial_tok_type = PartialToken::GARBAGE;
+        tok.push_back(c);
+        line_pos++;
+        break;
+
+    case '.':
+        if (partial_tok_type == PartialToken::NUMBER)
+        {
+            tok.push_back(c);
+            partial_tok_type = PartialToken::NUMBER_DOT;
+        }
+        else // (partial_tok_type == PartialToken::NUMBER_DOT)
+        {
+            partial_tok_type = PartialToken::GARBAGE;
+            tok.push_back(c);
+        }
+        line_pos++;
+        break;
     }
-    else // (tok[0] == 'f')
+
+    if (partial_tok_type == PartialToken::GARBAGE)
     {
-        valid = (
-            (tok.size() == 1 && c == 'a') ||
-            (tok.size() == 2 && c == 'l') ||
-            (tok.size() == 3 && c == 's') ||
-            (tok.size() == 4 && c == 'e'));
-        flush = valid && tok.size() == 4;
+        App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
+            fmt::format("{}, line {}, pos {}: stray character '{}' in number", datastream->getName(), line_num, line_pos, c));
     }
+}
 
-    if (valid)
+void DocumentParser::UpdateBool(const char c)
+{
+    switch (c)
     {
+    case '\r':
+        break;
+
+    case ' ':
+    case ',':
+    case '\t':
+        // Discard token
+        tok.push_back('\0');
+        App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
+            fmt::format("{}, line {}, pos {}: discarding incomplete boolean token '{}'", datastream->getName(), line_num, line_pos, tok.data()));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        line_pos++;
+        break;
+
+    case '\n':
+        // Discard token
+        tok.push_back('\0');
+        App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
+            fmt::format("{}, line {}, pos {}: discarding incomplete boolean token '{}'", datastream->getName(), line_num, line_pos, tok.data()));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        // Break line
+        doc.tokens.push_back({ TokenType::LINEBREAK, 0.f });
+        line_num++;
+        line_pos = 0;
+        break;
+
+    case 'r':
+        if (partial_tok_type != PartialToken::BOOL_TRUE || tok.size() != 1)
+        {
+            if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+                partial_tok_type = PartialToken::STRING_NAKED;
+            else
+                partial_tok_type = PartialToken::GARBAGE;
+        }
         tok.push_back(c);
-        if (flush)
-            FlushToken(doc, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
+        line_pos++;
+        break;
+
+    case 'u':
+        if (partial_tok_type != PartialToken::BOOL_TRUE || tok.size() != 2)
+        {
+            if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+                partial_tok_type = PartialToken::STRING_NAKED;
+            else
+                partial_tok_type = PartialToken::GARBAGE;
+        }
+        tok.push_back(c);
+        line_pos++;
+        break;
+
+    case 'a':
+        if (partial_tok_type != PartialToken::BOOL_FALSE || tok.size() != 1)
+        {
+            if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+                partial_tok_type = PartialToken::STRING_NAKED;
+            else
+                partial_tok_type = PartialToken::GARBAGE;
+        }
+        tok.push_back(c);
+        line_pos++;
+        break;
+
+    case 'l':
+        if (partial_tok_type != PartialToken::BOOL_FALSE || tok.size() != 2)
+        {
+            if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+                partial_tok_type = PartialToken::STRING_NAKED;
+            else
+                partial_tok_type = PartialToken::GARBAGE;
+        }
+        tok.push_back(c);
+        line_pos++;
+        break;
+
+    case 's':
+        if (partial_tok_type != PartialToken::BOOL_FALSE || tok.size() != 3)
+        {
+            if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+                partial_tok_type = PartialToken::STRING_NAKED;
+            else
+                partial_tok_type = PartialToken::GARBAGE;
+        }
+        tok.push_back(c);
+        line_pos++;
+        break;
+
+    case 'e':
+        if (partial_tok_type == PartialToken::BOOL_TRUE || tok.size() == 3)
+        {
+            doc.tokens.push_back({ TokenType::BOOL, 1.f });
+            tok.clear();
+            partial_tok_type = PartialToken::NONE;
+        }
+        else if (partial_tok_type == PartialToken::BOOL_FALSE || tok.size() == 4)
+        {
+            doc.tokens.push_back({ TokenType::BOOL, 0.f });
+            tok.clear();
+            partial_tok_type = PartialToken::NONE;
+        }
+        else
+        {
+            if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+                partial_tok_type = PartialToken::STRING_NAKED;
+            else
+                partial_tok_type = PartialToken::GARBAGE;
+            tok.push_back(c);
+        }
+        line_pos++;
+        break;
+
+    default:
+        if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+            partial_tok_type = PartialToken::STRING_NAKED;
+        else
+            partial_tok_type = PartialToken::GARBAGE;
+        tok.push_back(c);
+        break;
     }
-    else if (doc.tokens.size() == 0 || doc.tokens.back().type == TokenType::LINEBREAK)
+
+    if (partial_tok_type == PartialToken::GARBAGE)
     {
-        tok.push_back(c);
-        tok_type = TokenType::KEYWORD;
+        App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
+            fmt::format("{}, line {}, pos {}: stray character '{}' in boolean", datastream->getName(), line_num, line_pos, c));
     }
-    else if (options & Document::OPTION_ALLOW_NAKED_STRINGS)
+}
+
+void DocumentParser::UpdateKeyword(const char c)
+{
+    switch (c)
     {
+    case '\r':
+        break;
+
+    case ' ':
+    case ',':
+    case '\t':
+        // Flush keyword
+        doc.tokens.push_back({ TokenType::KEYWORD, (float)doc.string_pool.size() });
+        tok.push_back('\0');
+        std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        line_pos++;
+        break;
+
+    case '\n':
+        // Flush keyword
+        doc.tokens.push_back({ TokenType::KEYWORD, (float)doc.string_pool.size() });
+        tok.push_back('\0');
+        std::copy(tok.begin(), tok.end(), std::back_inserter(doc.string_pool));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        // Break line
+        doc.tokens.push_back({ TokenType::LINEBREAK, 0.f });
+        line_num++;
+        line_pos = 0;
+        break;
+
+    default:
+        if (!isalnum(c))
+        {
+            partial_tok_type = PartialToken::GARBAGE;
+        }
         tok.push_back(c);
-        tok_type = TokenType::STRING;
-        tok_string_naked = true;
+        line_pos++;
+        break;
     }
-    else
+
+    if (partial_tok_type == PartialToken::GARBAGE)
     {
         App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-            fmt::format("{}, line {}: stray character '{}' in boolean", datastream->getName(), line_num, c));
-        FlushToken(doc, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
+            fmt::format("{}, line {}, pos {}: stray character '{}' in keyword", datastream->getName(), line_num, line_pos, c));
     }
 }
 
-void Document::Load(Ogre::DataStreamPtr datastream, BitMask_t options)
+void DocumentParser::UpdateGarbage(const char c)
+{
+    switch (c)
+    {
+    case '\r':
+        break;
+
+    case ' ':
+    case ',':
+    case '\t':
+    case '\n':
+        tok.push_back('\0');
+        App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
+            fmt::format("{}, line {}, pos {}: discarding garbage token '{}'", datastream->getName(), line_num, line_pos, tok.data()));
+        tok.clear();
+        partial_tok_type = PartialToken::NONE;
+        line_pos++;
+        break;
+
+    default:
+        tok.push_back(c);
+        line_pos++;
+        break;
+    }
+}   
+
+void Document::Load(Ogre::DataStreamPtr datastream, const BitMask_t options)
 {
     // Reset the document
     tokens.clear();
     string_pool.clear();
 
     // Prepare context
-    const size_t BUF_MAX = 10 * 1024; // 10Kb
-    char buf[BUF_MAX];
-    std::vector<char> tok;
-    RoR::TokenType tok_type = TokenType::NONE;
-    bool tok_number_dot = false;
-    bool tok_string_naked = false;
-    size_t line_num = 0;
+    DocumentParser parser(*this, options, datastream);
+    const size_t LINE_BUF_MAX = 10 * 1024; // 10Kb
+    char buf[LINE_BUF_MAX];
 
     // Parse the text
     while (!datastream->eof())
     {
-        size_t buf_len = datastream->read(buf, BUF_MAX);
+        size_t buf_len = datastream->read(buf, LINE_BUF_MAX);
         for (size_t i = 0; i < buf_len; i++)
         {
             const char c = buf[i];
 
-            if (c == '\r') // Carriage return character is ignored
-                continue;
-
-            switch (tok_type)
+            switch (parser.partial_tok_type)
             {
-            case TokenType::NONE:
-                if (c == '\n')
-                {
-                    tokens.push_back({ TokenType::LINEBREAK, 0.f });
-                    line_num++;
-                }
-                else if (c != ' ' && c != ',' && c != '\t')
-                {
-                    BeginToken(*this, options, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num, c);
-                }
+            case PartialToken::NONE:
+                parser.BeginToken(c);
                 break;
 
-            case TokenType::COMMENT:
-                if (c == '\n')
-                {
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                    tokens.push_back({ TokenType::LINEBREAK, 0.f });
-                    line_num++;
-                }
-                else
-                {
-                    tok.push_back(c);
-                }
+            case PartialToken::COMMENT_SEMICOLON:
+            case PartialToken::COMMENT_SLASH:
+                parser.UpdateComment(c);
                 break;
 
-            case TokenType::STRING:
-                if (c == '\n')
-                {
-                    tok.push_back('\0');
-                    if (BITMASK_IS_0(options, OPTION_ALLOW_NAKED_STRINGS))
-                    {
-                        App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-                            fmt::format("{}, line {}: accepting unclosed string '{}'", datastream->getName(), line_num, tok.data()));
-                    }
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-
-                    tokens.push_back({ TokenType::LINEBREAK, 0.f });
-                    line_num++;
-                }
-                else if (tok_string_naked && c == ' ' || c == ',' || c == '\t')
-                {
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                }
-                else
-                {
-                    tok.push_back(c);
-                }
+            case PartialToken::STRING_QUOTED:
+            case PartialToken::STRING_NAKED:
+                parser.UpdateString(c);
                 break;
 
-            case TokenType::NUMBER:
-                if (c == '\n')
-                {
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                    tokens.push_back({ TokenType::LINEBREAK, 0.f });
-                    line_num++;
-                }
-                else
-                {
-                    ProcessNumber(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num, c);
-                }
+            case PartialToken::NUMBER:
+            case PartialToken::NUMBER_DOT:
+                parser.UpdateNumber(c);
                 break;
 
-            case TokenType::BOOL:
-                if (c == '\n')
-                {
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                    tokens.push_back({ TokenType::LINEBREAK, 0.f });
-                    line_num++;
-                }
-                else
-                {
-                    ProcessBool(*this, options, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num, c);
-                }
+            case PartialToken::BOOL_TRUE:
+            case PartialToken::BOOL_FALSE:
+                parser.UpdateBool(c);
                 break;
 
-            case TokenType::KEYWORD:
-                if (c == '\n')
-                {
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                    tokens.push_back({ TokenType::LINEBREAK, 0.f });
-                    line_num++;
-                }
-                else if (c == ' ' || c == ',' || c == '\t')
-                {
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                }
-                else if (isalnum(c))
-                {
-                    tok.push_back(c);
-                }
-                else
-                {
-                    App::GetConsole()->putMessage(Console::CONSOLE_MSGTYPE_INFO, Console::CONSOLE_SYSTEM_WARNING,
-                        fmt::format("{}, line {}: stray character '{}' in keyword, parsing as '{}'", datastream->getName(), line_num, c, tok.data()));
-                    FlushToken(*this, tok, tok_type, tok_number_dot, tok_string_naked, datastream, line_num);
-                }
+            case PartialToken::KEYWORD:
+                parser.UpdateKeyword(c);
+                break;
+
+            case PartialToken::GARBAGE:
+                parser.UpdateGarbage(c);
                 break;
             }
         }
     }
+
+    // Ensure newline at end of file
+    if (tokens.size() == 0 || tokens.back().type != TokenType::LINEBREAK)
+    {
+        tokens.push_back({ TokenType::LINEBREAK, 0.f });
+    }
 }
 
 #if OGRE_PLATFORM == OGRE_PLATFORM_WIN32
diff --git a/source/main/utils/GenericFileFormat.h b/source/main/utils/GenericFileFormat.h
index cf36024e9d..82ddcfbba5 100644
--- a/source/main/utils/GenericFileFormat.h
+++ b/source/main/utils/GenericFileFormat.h
@@ -29,7 +29,7 @@
 /// 
 /// Remarks:
 ///  - Strings cannot be multiline. Linebreak within string ends the string.
-///  - KEYWORD tokens cannot start with a digit.
+///  - KEYWORD tokens cannot start with a digit or special character.
 
 #include <vector>
 #include <string>