#include "Parser.h" #include #include #include #include #include "Environment.h" namespace { struct ParseGroup { std::string accept; operator std::string() const { return accept; } bool operator== (const std::string& other) const { return accept == other; } }; struct Match { std::string accept; operator char() const { if (accept.length() == 1) return accept[0]; else { std::cerr << "Cannot demote Match " << accept << " to char" << std::endl; return '\0'; } } bool operator== (const std::string& other) const { return accept == other; } }; const ParseGroup NUMERIC = { "1234567890" }; const Match DECIMAL_DOT = { "." }; const Match NEGATIVE = { "-" }; const ParseGroup ALPHA = { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" }; const Match ARRAY_OPEN = { "[" }; const Match ARRAY_CLOSE = { "]" }; const Match ARRAY_DELIM = { "," }; const Match GROUP_OPEN = { "(" }; const Match GROUP_CLOSE = { ")" }; const Match QUOTE = { "\"" }; const Match COMMENT_BEGIN = { "//" }; const Match DIALOGUE_OPEN = { "<<-" }; const Match DIALOGUE_CLOSE = { "->>" }; const Match BEGIN = { "BEGIN" }; const Match END = { "END" }; const ParseGroup SYMBOL = { ALPHA.accept + NUMERIC.accept + "_"}; const Match SPECIAL_SYMBOLS[] = { { "+" }, { "-" }, { "*" }, { "/" }, { "=?" }, { ">?" }, { "=?" } }; const ParseGroup WS = { " \t\v\f\r\n" }; const ParseGroup SEPARATOR = { WS.accept + char(ARRAY_OPEN) + char(ARRAY_CLOSE) + char(GROUP_OPEN) + char(GROUP_CLOSE) + char(ARRAY_DELIM) + COMMENT_BEGIN.accept[0] }; const Match NEWLINE = { "\n" }; const ParseGroup ESCAPED = { "\\\"" }; const Match ESCAPE = { "\\" }; // Dialogue mode matches const Match MARKUP_OPEN = { "[" }; const Match MARKUP_CLOSE = { "]" }; const Match SPEAKER_OPEN = { "[" }; const Match SPEAKER_CLOSE = { "]" }; const Match MARKUP_TEXT_OPEN = { "{" }; const Match MARKUP_TEXT_CLOSE = { "}" }; const Match TEMPLATE_IND = { "$" }; const Match TEMPLATE_OPEN = { "{" }; const Match TEMPLATE_CLOSE = { "}" }; const Match COMMAND_ESCAPE = { "*!" }; const ParseGroup DIALOGUE_ESCAPED_SINGLE = { ESCAPE.accept + char(MARKUP_OPEN) + char(MARKUP_CLOSE) + char(MARKUP_TEXT_OPEN) + char(MARKUP_TEXT_CLOSE) + // char(SPEAKER_OPEN) + // char(SPEAKER_CLOSE) + char(TEMPLATE_IND) // char(TEMPLATE_OPEN) + // char(TEMPLATE_CLOSE) }; std::string read_file_to_string(const std::string& path) { std::ifstream f(path); { // Some apps on Windows adds this signature in front of UTF-8 files when saving char a, b, c; a = f.get(); b = f.get(); c = f.get(); if (a != (char)0xEF || b != (char)0xBB || c != (char)0xBF) f.seekg(0); else std::cerr << "Warning: Windows UTF-8 BOM skipped" << std::endl; } std::stringstream buffer; buffer << f.rdbuf(); return buffer.str(); } std::vector split_string_by_lines(const std::string& str) { std::vector lines; int pos = 0; int prev = 0; while ((pos = str.find(NEWLINE, prev)) != std::string::npos) { lines.push_back(str.substr(prev, pos - prev)); prev = pos + 1; } lines.push_back(str.substr(prev)); return lines; } inline bool IsNumeric(const std::string& str) { bool negative = str[0] == NEGATIVE; bool had_dot = false; for (auto& c : negative ? str.substr(1) : str) { if (NUMERIC.accept.find(c) == std::string::npos) { if (c == DECIMAL_DOT.accept[0]) { if (had_dot) return false; else had_dot = true; } else return false; } } if (had_dot + negative == str.length()) return false; return true; } inline bool ContainsOnlyWS(const std::string& s) { for (auto& c : s) { if (WS.accept.find(c) == std::string::npos) return false; } return true; } void SkipWS(const std::string& f, size_t& pos) { while (WS.accept.find(f[pos]) != std::string::npos) pos++; } void SkipToNextLine(const std::string& f, size_t& pos) { while (f[pos] != NEWLINE.accept[0]) pos++; pos++; } void SkipComments(const std::string& f, size_t& pos) { SkipWS(f, pos); while (f.substr(pos, 2) == COMMENT_BEGIN.accept) { SkipToNextLine(f, pos); SkipWS(f, pos); } } void SkipOverFirstChar(const std::string& f, size_t& pos) { SkipWS(f, pos); pos++; } std::string GetToken(const std::string& f, size_t& pos) { SkipWS(f, pos); auto start = pos; while (++pos) { if (SEPARATOR.accept.find(f[pos]) != std::string::npos) break; } return f.substr(start, pos - start); } std::string PeekToken(const std::string& f, size_t pos) { SkipWS(f, pos); auto start = pos; while (++pos) { if (SEPARATOR.accept.find(f[pos]) != std::string::npos) break; } return f.substr(start, pos - start); } bool IsLegalSymbolName(const std::string& token) { for (const auto& x: SPECIAL_SYMBOLS) { if (token == x.accept) return true; } if (ALPHA.accept.find(token[0]) == std::string::npos) return false; for (auto& i : token) if (SYMBOL.accept.find(i) == std::string::npos) return false; return true; } NVL::Parse::Object ParseExpression(const std::string& f, size_t& pos); NVL::Parse::Object ParseArray(const std::string& f, size_t& pos, int layer) { SkipComments(f, pos); std::vector array{}; array.push_back(ParseExpression(f, pos)); while (PeekToken(f, pos)[0] != ARRAY_CLOSE) { if (PeekToken(f, pos)[0] == ARRAY_DELIM) SkipOverFirstChar(f, pos); else throw std::runtime_error("Invalid array member"); array.push_back(ParseExpression(f, pos)); } return { NVL::Parse::Type::Array, array }; } std::string ParseString(const std::string& f, size_t& pos) { SkipComments(f, pos); std::vector discards{}; auto start = ++pos; // skip opening quote do { if (f[pos] == QUOTE) { break; } else if (f[pos] == ESCAPE) { if (ESCAPED.accept.find(f[pos]) != std::string::npos) { discards.push_back(pos++); } else throw std::runtime_error("Unrecognized escape sequence"); } else if (f[pos] == NEWLINE) { throw std::runtime_error("Unclosed String"); } } while (pos++); auto str = f.substr(start, pos++ - start); for (int i = 0; i < discards.size(); i++) { str.erase(discards[i] - start - i, 1); } return str; } unsigned GetProcedureArity(const std::string& key) { return NVL::Environment::ENVIRONMENT.get(key).length; } NVL::Parse::Command ParseCommand(const std::string& f, size_t& pos) { SkipComments(f, pos); auto proc = GetToken(f, pos); if (!IsLegalSymbolName(proc)) throw std::runtime_error("Illegal Procedure name"); NVL::Parse::Command c{ NVL::Parse::Object{ NVL::Parse::Type::Symbol, proc } }; for (int i = 0; i < GetProcedureArity(proc); i++) { c.push_back(ParseExpression(f, pos)); }; SkipComments(f, pos); return c; } NVL::Parse::Object ParseExpression(const std::string& f, size_t& pos) { SkipComments(f, pos); auto t = PeekToken(f, pos); if (t[0] == ARRAY_OPEN) { SkipOverFirstChar(f, pos); auto c = ParseArray(f, pos, 0); if (PeekToken(f, pos)[0] != ARRAY_CLOSE) throw std::runtime_error("Cannot match closing Array"); else SkipOverFirstChar(f, pos); return c; } else if (t[0] == GROUP_OPEN) { SkipOverFirstChar(f, pos); auto c = ParseCommand(f, pos); if (PeekToken(f, pos)[0] != GROUP_CLOSE) throw std::runtime_error("Cannot match closing subexpression"); else SkipOverFirstChar(f, pos); return NVL::Parse::Object{ NVL::Parse::Type::Subexpression, c }; } else if (t[0] == GROUP_CLOSE) throw std::runtime_error("Cannot match closing subexpression, likely too few arguments"); else if (t[0] == QUOTE) return { NVL::Parse::Type::String, ParseString(f, pos) }; else if (t[0] == ARRAY_CLOSE) throw std::runtime_error("Cannot match closing array"); else { auto token = GetToken(f, pos); if (IsNumeric(token)) return { NVL::Parse::Type::Number, std::stof(token) }; else if (IsLegalSymbolName(token)) return { NVL::Parse::Type::Symbol, token }; else throw std::runtime_error("Illegal symbol"); } } NVL::Parse::Command ParseDialogue(const std::string& s) { if (s.substr(0, 2) == COMMAND_ESCAPE.accept) { size_t dummy = 0; // Pad a space towards the end, the helpers do not expect strings to immediately terminate return ParseCommand(s.substr(2) + " ", dummy); } // assume SwitchSpeaker and Say are unary for now if (s.back() == SPEAKER_CLOSE) { if (s.front() == SPEAKER_OPEN) { auto name = s.substr(1, s.length() - 2); if (IsLegalSymbolName(name)) return { { NVL::Parse::Type::Symbol, "SwitchSpeaker" }, { NVL::Parse::Type::String, name } }; } else throw std::runtime_error("Malformed speaker command"); } return { { NVL::Parse::Type::Symbol, "Say" }, { NVL::Parse::Type::String, s } }; } NVL::Parse::Scene ParseScene(const std::string& f, size_t& pos) { SkipComments(f, pos); if (!(GetToken(f, pos) == BEGIN.accept)) throw std::runtime_error("Could not match accept at root"); auto scene_name = GetToken(f, pos); if (!IsLegalSymbolName(scene_name)) throw std::runtime_error("Illegal Scene name"); NVL::Parse::Scene s{ scene_name }; bool dialogue_mode = false; while (PeekToken(f, pos) != END.accept) { if (!dialogue_mode) if (PeekToken(f, pos) == DIALOGUE_OPEN.accept) { dialogue_mode = true; GetToken(f, pos); // skip DIALOGUE_OPEN SkipComments(f, pos); } else s.append(ParseCommand(f, pos)); else { auto end = f.find(NEWLINE.accept + DIALOGUE_CLOSE.accept, pos); if (end == std::string::npos) throw std::runtime_error("Dialogue does not terminate"); auto lines = split_string_by_lines(f.substr(pos, end - pos)); for (auto& l : lines) { if (!l.empty() && !ContainsOnlyWS(l)) s.append(ParseDialogue(l)); } dialogue_mode = false; pos = end; GetToken(f, pos); // skip DIALOGUE_CLOSE SkipComments(f, pos); } } if (dialogue_mode) throw std::runtime_error("Illegal Scene end"); GetToken(f, pos); // skip END SkipComments(f, pos); return s; } } namespace NVL::Parse { std::vector ParseFile(const std::string& path) { std::string f = read_file_to_string(path); std::vector list {}; // Vector of scenes which each contain a vector of Parses for (size_t i = 0; i < f.length(); i++) { list.push_back(ParseScene(f, i)); } return list; } }