#include "Parser.h" #include #include #include #include #include #include #include "Environment.h" #include #include namespace { using namespace NVL; struct ParseGroup { String accept; operator String() const { return accept; } bool operator== (const String& other) const { return accept == other; } }; struct Match { String accept; operator wchar_t() const { if (accept.length() == 1) return accept[0]; else { std::wcerr << "Cannot demote Match " << accept << " to char" << std::endl; return '\0'; } } bool operator== (const String& other) const { return accept == other; } }; const ParseGroup NUMERIC = { L"1234567890" }; const Match DECIMAL_DOT = { L"." }; const Match NEGATIVE = { L"-" }; const ParseGroup ALPHA = { L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" }; const Match ARRAY_OPEN = { L"[" }; const Match ARRAY_CLOSE = { L"]" }; const Match ARRAY_DELIM = { L"," }; const Match GROUP_OPEN = { L"(" }; const Match GROUP_CLOSE = { L")" }; const Match QUOTE = { L"\"" }; const Match COMMENT_BEGIN = { L"//" }; const Match DIALOGUE_OPEN = { L"<<-" }; const Match DIALOGUE_CLOSE = { L"->>" }; const Match BEGIN = { L"BEGIN" }; const Match END = { L"END" }; const ParseGroup SYMBOL = { ALPHA.accept + NUMERIC.accept + L"_"}; const Match SPECIAL_SYMBOLS[] = { { L"+" }, { L"-" }, { L"*" }, { L"/" }, { L"=?" }, { L">?" }, { L"=?" } }; const ParseGroup WS = { L" \t\v\f\r\n" }; const ParseGroup SEPARATOR = { WS.accept + wchar_t(ARRAY_OPEN) + wchar_t(ARRAY_CLOSE) + wchar_t(GROUP_OPEN) + wchar_t(GROUP_CLOSE) + wchar_t(ARRAY_DELIM) + COMMENT_BEGIN.accept[0] }; const Match NEWLINE = { L"\n" }; const ParseGroup ESCAPED = { L"\\\"" }; const Match ESCAPE = { L"\\" }; // Dialogue mode matches const Match MARKUP_OPEN = { L"[" }; const Match MARKUP_CLOSE = { L"]" }; const Match SPEAKER_OPEN = { L"[" }; const Match SPEAKER_CLOSE = { L"]" }; const Match MARKUP_TEXT_OPEN = { L"{" }; const Match MARKUP_TEXT_CLOSE = { L"}" }; const Match TEMPLATE_IND = { L"$" }; const Match TEMPLATE_OPEN = { L"{" }; const Match TEMPLATE_CLOSE = { L"}" }; const Match COMMAND_ESCAPE = { L"*!" }; const ParseGroup DIALOGUE_ESCAPED_SINGLE = { ESCAPE.accept + wchar_t(MARKUP_OPEN) + wchar_t(MARKUP_CLOSE) + wchar_t(MARKUP_TEXT_OPEN) + wchar_t(MARKUP_TEXT_CLOSE) + // char(SPEAKER_OPEN) + // char(SPEAKER_CLOSE) + wchar_t(TEMPLATE_IND) // char(TEMPLATE_OPEN) + // char(TEMPLATE_CLOSE) }; std::wstring read_file_to_string(const std::string& path) { std::ifstream f(path); { // Some apps on Windows adds this signature in front of UTF-8 files when saving char a, b, c; a = f.get(); b = f.get(); c = f.get(); if (a != (char)0xEF || b != (char)0xBB || c != (char)0xBF) f.seekg(0); else std::cerr << "Warning: Windows UTF-8 BOM skipped" << std::endl; } std::stringstream buffer; buffer << f.rdbuf(); std::wstring_convert> converter; return converter.from_bytes(buffer.str()); } std::vector split_string_by_lines(const String& str) { std::vector lines; size_t pos = 0; size_t prev = 0; while ((pos = str.find(NEWLINE, prev)) != String::npos) { lines.push_back(str.substr(prev, pos - prev)); prev = pos + 1; } lines.push_back(str.substr(prev)); return lines; } inline bool IsNumeric(const String& str) { bool negative = str[0] == NEGATIVE; bool had_dot = false; for (auto& c : negative ? str.substr(1) : str) { if (NUMERIC.accept.find(c) == String::npos) { if (c == DECIMAL_DOT.accept[0]) { if (had_dot) return false; else had_dot = true; } else return false; } } if (had_dot + negative == str.length()) return false; return true; } inline bool ContainsOnlyWS(const String& s) { for (auto& c : s) { if (WS.accept.find(c) == String::npos) return false; } return true; } void SkipWS(const String& f, size_t& pos) { while (WS.accept.find(f[pos]) != String::npos) pos++; } void SkipToNextLine(const String& f, size_t& pos) { while (f[pos] != NEWLINE.accept[0]) pos++; pos++; } void SkipComments(const String& f, size_t& pos) { SkipWS(f, pos); while (f.substr(pos, 2) == COMMENT_BEGIN.accept) { SkipToNextLine(f, pos); SkipWS(f, pos); } } void SkipOverFirstChar(const String& f, size_t& pos) { SkipWS(f, pos); pos++; } String GetToken(const String& f, size_t& pos) { SkipWS(f, pos); auto start = pos; while (++pos) { if (SEPARATOR.accept.find(f[pos]) != String::npos) break; } return f.substr(start, pos - start); } String PeekToken(const String& f, size_t pos) { SkipWS(f, pos); auto start = pos; while (++pos) { if (SEPARATOR.accept.find(f[pos]) != String::npos) break; } return f.substr(start, pos - start); } bool IsLegalSymbolName(const String& token) { for (const auto& x: SPECIAL_SYMBOLS) { if (token == x.accept) return true; } if (ALPHA.accept.find(token[0]) == String::npos) return false; for (auto& i : token) if (SYMBOL.accept.find(i) == String::npos) return false; return true; } Parse::Object ParseExpression(const String& f, size_t& pos); Parse::Object ParseArray(const String& f, size_t& pos, u32 layer) { SkipComments(f, pos); std::vector array{}; array.push_back(ParseExpression(f, pos)); while (PeekToken(f, pos)[0] != ARRAY_CLOSE) { if (PeekToken(f, pos)[0] == ARRAY_DELIM) SkipOverFirstChar(f, pos); else throw std::runtime_error("Invalid array member"); array.push_back(ParseExpression(f, pos)); } return { Parse::Type::Array, array }; } String ParseString(const String& f, size_t& pos) { SkipComments(f, pos); std::vector discards{}; auto start = ++pos; // skip opening quote do { if (f[pos] == QUOTE) { break; } else if (f[pos] == ESCAPE) { if (ESCAPED.accept.find(f[pos]) != String::npos) { discards.push_back(pos++); } else throw std::runtime_error("Unrecognized escape sequence"); } else if (f[pos] == NEWLINE) { throw std::runtime_error("Unclosed String"); } } while (pos++); auto str = f.substr(start, pos++ - start); for (size_t i = 0; i < discards.size(); i++) { str.erase(discards[i] - start - i, 1); } return str; } u32 GetProcedureArity(const String& key) { return Environment::ENVIRONMENT.get(key).length; } Parse::Command ParseCommand(const String& f, size_t& pos) { SkipComments(f, pos); auto proc = GetToken(f, pos); if (!IsLegalSymbolName(proc)) throw std::runtime_error("Illegal Procedure name"); Parse::Command c{ Parse::Object{ Parse::Type::Symbol, proc } }; for (u32 i = 0; i < GetProcedureArity(proc); i++) { c.push_back(ParseExpression(f, pos)); } SkipComments(f, pos); return c; } Parse::Object ParseExpression(const String& f, size_t& pos) { SkipComments(f, pos); auto t = PeekToken(f, pos); if (t[0] == ARRAY_OPEN) { SkipOverFirstChar(f, pos); auto c = ParseArray(f, pos, 0); if (PeekToken(f, pos)[0] != ARRAY_CLOSE) throw std::runtime_error("Cannot match closing Array"); else SkipOverFirstChar(f, pos); return c; } else if (t[0] == GROUP_OPEN) { SkipOverFirstChar(f, pos); auto c = ParseCommand(f, pos); if (PeekToken(f, pos)[0] != GROUP_CLOSE) throw std::runtime_error("Cannot match closing subexpression"); else SkipOverFirstChar(f, pos); return Parse::Object{ Parse::Type::Subexpression, c }; } else if (t[0] == GROUP_CLOSE) throw std::runtime_error("Cannot match closing subexpression, likely too few arguments"); else if (t[0] == QUOTE) return { Parse::Type::String, ParseString(f, pos) }; else if (t[0] == ARRAY_CLOSE) throw std::runtime_error("Cannot match closing array"); else { auto token = GetToken(f, pos); if (IsNumeric(token)) return { std::stof(token) }; else if (IsLegalSymbolName(token)) return { Parse::Type::Symbol, token }; else throw std::runtime_error("Illegal symbol"); } } /* * NVL Markup Parsetree * * * - Vec:2 - Num: Markup begin * | | * | - Num: Markup end * | * - Vec:N - Str: T of Markup * | * OR * | * - Vec:2 - Str: T of Markup * | * - Vec: Params */ Parse::Object MatchMarkup(String& s) { static const std::regex typer(R"((?:^|[^\\])\[([^\]]+)\]\s*\{([^\}]+)\})"); // G1 -> Specifiers, G2 -> Contents static const std::regex effect(R"(\s*(?:([^,\(\)]+?)\s*\(\s*([^\(\)]+?)\s*\)|([^,\(\)]+?))\s*(?:,\s*|$))"); // G1 & G2 -> Func, G3 -> Attr static const std::regex param(R"(([^,]+?)\s*(?:,\s*|$))"); // Comma split of func args std::vector tags; std::wsmatch tags_match; std::wsmatch effects_match; std::wsmatch params_match; String reconstruction{}; size_t running_offset = 0; // to account for characters removed bool has_markup{}; // Match tags String::const_iterator tags_start(s.cbegin()); while (std::regex_search(tags_start, s.cend(), tags_match, typer)) { has_markup = true; Parse::Object m{ Parse::Type::Array, std::vector{} }; reconstruction.append(tags_match.prefix().first, std::next(tags_match[0].first, 1)); // match will overmatch behind by 1 reconstruction.append(tags_match[2].first, tags_match[2].second); running_offset += tags_match[2].first - std::next(tags_match[0].first, 1); size_t begin = tags_match[2].first - s.cbegin() - running_offset; Parse::Object range{ Parse::Type::Array, { std::vector{ { static_cast(begin) }, { static_cast(begin + (tags_match[2].second - tags_match[2].first)) } } } }; std::vector effects{}; // Match markup options String::const_iterator effects_start(tags_match[1].first); while (std::regex_search(effects_start, tags_match[1].second, effects_match, effect)) { if (effects_match[3].matched) { effects.push_back({ Parse::Type::String, effects_match[3].str() }); } else { std::vector args; String::const_iterator params_start(effects_match[2].first); while (std::regex_search(params_start, effects_match[2].second, params_match, param)) { size_t temp = 0; args.push_back(ParseExpression(params_match[1].str() + SEPARATOR.accept[0], temp)); // PeekToken will freak out if I don't do this params_start = params_match.suffix().first; } effects.push_back({ Parse::Type::Array, std::vector{ { Parse::Type::String, effects_match[1].str() }, { Parse::Type::Array, args } } }); } effects_start = effects_match.suffix().first; } running_offset += tags_match[0].second - tags_match[2].second; tags_start = tags_match.suffix().first; tags.push_back({ Parse::Type::Array, std::vector{ range, { Parse::Type::Array, effects } } }); } if (has_markup) { reconstruction.append(tags_match.suffix().first, tags_match.suffix().second); s = reconstruction; return { Parse::Type::Array, tags }; } else { return { Parse::Type::Array, std::vector{} }; } } Parse::Command ParseDialogue(const String& s) { if (s.substr(0, 2) == COMMAND_ESCAPE.accept) { size_t dummy = 0; // Pad a space towards the end, the helpers do not expect strings to immediately terminate return ParseCommand(s.substr(2) + L" ", dummy); } // assume arity for SwitchSpeaker and Say if (s.back() == SPEAKER_CLOSE) { if (s.front() == SPEAKER_OPEN) { auto name = s.substr(1, s.length() - 2); // if (IsLegalSymbolName(name)) return { { Parse::Type::Symbol, L"SwitchSpeaker" }, { Parse::Type::String, name } }; } else throw std::runtime_error("Malformed speaker command"); } String copy{ s }; Parse::Object tags = MatchMarkup(copy); // THIS WILL MODIFY COPY return { { Parse::Type::Symbol, L"Say" }, { Parse::Type::String, copy }, tags }; } Parse::Scene ParseScene(const String& f, size_t& pos) { SkipComments(f, pos); if (!(GetToken(f, pos) == BEGIN.accept)) throw std::runtime_error("Could not match accept at root"); auto scene_name = GetToken(f, pos); if (!IsLegalSymbolName(scene_name)) throw std::runtime_error("Illegal Scene name"); Parse::Scene s{ scene_name }; bool dialogue_mode = false; while (PeekToken(f, pos) != END.accept) { if (!dialogue_mode) if (PeekToken(f, pos) == DIALOGUE_OPEN.accept) { dialogue_mode = true; GetToken(f, pos); // skip DIALOGUE_OPEN SkipComments(f, pos); } else s.append(ParseCommand(f, pos)); else { auto end = f.find(NEWLINE.accept + DIALOGUE_CLOSE.accept, pos); if (end == String::npos) throw std::runtime_error("Dialogue does not terminate"); auto lines = split_string_by_lines(f.substr(pos, end - pos)); for (auto& l : lines) { if (!l.empty() && !ContainsOnlyWS(l)) s.append(ParseDialogue(l)); } dialogue_mode = false; pos = end; GetToken(f, pos); // skip DIALOGUE_CLOSE SkipComments(f, pos); } } if (dialogue_mode) throw std::runtime_error("Illegal Scene end"); GetToken(f, pos); // skip END SkipComments(f, pos); return s; } } namespace NVL::Parse { Object::Object(const Number& v) : type(Type::Number), value(v) { } Object::Object(Type t, const String& v) : type(t), value(v) { if (t != Type::String && t != Type::Symbol) throw std::runtime_error("Bad type when constructing object!"); } Object::Object(Type t, const std::vector& v) : type(t), value(v) { if (t != Type::Array && t != Type::Subexpression) throw std::runtime_error("Bad type when constructing object!"); } Object::Object(Number&& v) : type(Type::Number), value(std::move(v)) { } Object::Object(Type t, String&& v) : type(t), value(std::move(v)) { if (t != Type::String && t != Type::Symbol) throw std::runtime_error("Bad type when constructing object!"); } Object::Object(Type t, std::vector&& v) : type(t), value(std::move(v)) { if (t != Type::Array && t != Type::Subexpression) throw std::runtime_error("Bad type when constructing object!"); } std::vector ParseFile(const std::string& path) { std::wstring f = read_file_to_string(path); std::vector list {}; // Vector of scenes which each contain a vector of Parses for (size_t i = 0; i < f.length(); i++) { list.push_back(ParseScene(f, i)); } return list; } }