2021-12-17 01:05:38 -05:00
|
|
|
#include "Parser.h"
|
2021-07-07 18:03:41 -04:00
|
|
|
|
2021-12-12 03:41:54 -05:00
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
|
2022-08-23 18:02:15 -04:00
|
|
|
#include <srell.hpp>
|
2022-05-10 02:42:12 -04:00
|
|
|
|
|
|
|
#include <utility>
|
2021-12-12 03:41:54 -05:00
|
|
|
|
2021-12-12 22:20:28 -05:00
|
|
|
#include "Environment.h"
|
2021-12-12 03:41:54 -05:00
|
|
|
|
|
|
|
namespace {
|
2022-08-18 12:17:43 -04:00
|
|
|
using namespace NVL;
|
|
|
|
|
2021-12-12 03:41:54 -05:00
|
|
|
struct ParseGroup {
|
2022-08-27 03:04:39 -04:00
|
|
|
const String accept;
|
2021-12-12 22:20:28 -05:00
|
|
|
|
2022-08-27 03:04:39 -04:00
|
|
|
operator String() const {
|
2021-12-12 22:20:28 -05:00
|
|
|
return accept;
|
|
|
|
}
|
2022-08-27 03:04:39 -04:00
|
|
|
bool operator== (const String& other) const {
|
2021-12-12 22:20:28 -05:00
|
|
|
return accept == other;
|
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Match {
|
2022-08-27 03:04:39 -04:00
|
|
|
const String accept;
|
|
|
|
|
|
|
|
operator Char() const {
|
2021-12-17 01:05:38 -05:00
|
|
|
if (accept.length() == 1)
|
|
|
|
return accept[0];
|
|
|
|
else {
|
2022-08-22 02:15:25 -04:00
|
|
|
std::cerr << "NVL: Cannot demote Match " << to_std_string(accept) << " to char" << std::endl;
|
2021-12-17 01:05:38 -05:00
|
|
|
return '\0';
|
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
}
|
2022-08-27 03:04:39 -04:00
|
|
|
bool operator== (const String& other) const {
|
2021-12-12 03:41:54 -05:00
|
|
|
return accept == other;
|
2021-05-16 12:59:18 -04:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
};
|
|
|
|
|
2022-08-27 03:04:39 -04:00
|
|
|
const ParseGroup NUMERIC { u"1234567890" };
|
|
|
|
const Match DECIMAL_DOT { u"." };
|
|
|
|
const Match NEGATIVE { u"-" };
|
|
|
|
const ParseGroup ALPHA { u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" };
|
|
|
|
const Match ARRAY_OPEN { u"[" };
|
|
|
|
const Match ARRAY_CLOSE { u"]" };
|
|
|
|
const Match ARRAY_DELIM { u"," };
|
|
|
|
const Match GROUP_OPEN { u"(" };
|
|
|
|
const Match GROUP_CLOSE { u")" };
|
|
|
|
const Match QUOTE { u"\"" };
|
|
|
|
const Match COMMENT_BEGIN { u"//" };
|
|
|
|
const Match DIALOGUE_OPEN { u"<<-" };
|
|
|
|
const Match DIALOGUE_CLOSE { u"->>" };
|
|
|
|
const Match BEGIN { u"BEGIN" };
|
|
|
|
const Match END { u"END" };
|
|
|
|
const ParseGroup SYMBOL { ALPHA.accept + NUMERIC.accept + u"_"};
|
|
|
|
const Match SPECIAL_SYMBOLS[] {
|
2022-08-22 02:15:25 -04:00
|
|
|
{ u"+" },
|
|
|
|
{ u"-" },
|
|
|
|
{ u"*" },
|
|
|
|
{ u"/" },
|
|
|
|
{ u"=?" },
|
|
|
|
{ u">?" },
|
|
|
|
{ u"<?" },
|
|
|
|
{ u"<=?" },
|
|
|
|
{ u">=?" }
|
2021-12-17 01:05:38 -05:00
|
|
|
};
|
2022-08-27 03:04:39 -04:00
|
|
|
const ParseGroup WS { u" \t\v\f\r\n" };
|
|
|
|
const ParseGroup SEPARATOR {
|
2021-12-12 22:20:28 -05:00
|
|
|
WS.accept +
|
2022-08-22 02:15:25 -04:00
|
|
|
Char(ARRAY_OPEN) +
|
|
|
|
Char(ARRAY_CLOSE) +
|
|
|
|
Char(GROUP_OPEN) +
|
|
|
|
Char(GROUP_CLOSE) +
|
|
|
|
Char(ARRAY_DELIM) +
|
2022-08-27 03:04:39 -04:00
|
|
|
COMMENT_BEGIN.accept[0] +
|
|
|
|
u'\0'
|
2021-12-12 03:41:54 -05:00
|
|
|
};
|
2022-08-27 03:04:39 -04:00
|
|
|
const Match NEWLINE { u"\n" };
|
|
|
|
const ParseGroup ESCAPED { u"\\\"" };
|
2021-12-12 03:41:54 -05:00
|
|
|
|
2022-08-27 03:04:39 -04:00
|
|
|
const Match ESCAPE { u"\\" };
|
2021-12-13 14:04:12 -05:00
|
|
|
|
|
|
|
// Dialogue mode matches
|
2022-08-27 03:04:39 -04:00
|
|
|
const Match MARKUP_OPEN { u"[" };
|
|
|
|
const Match MARKUP_CLOSE { u"]" };
|
|
|
|
const Match SPEAKER_OPEN { u"[" };
|
|
|
|
const Match SPEAKER_CLOSE { u"]" };
|
|
|
|
const Match MARKUP_TEXT_OPEN { u"{" };
|
|
|
|
const Match MARKUP_TEXT_CLOSE { u"}" };
|
|
|
|
const Match TEMPLATE_IND { u"$" };
|
|
|
|
const Match TEMPLATE_OPEN { u"{" };
|
|
|
|
const Match TEMPLATE_CLOSE { u"}" };
|
2021-12-13 14:04:12 -05:00
|
|
|
|
2022-08-27 03:04:39 -04:00
|
|
|
const Match COMMAND_ESCAPE { u"*!" };
|
|
|
|
const ParseGroup DIALOGUE_ESCAPED_SINGLE {
|
2021-12-13 14:04:12 -05:00
|
|
|
ESCAPE.accept +
|
2022-08-22 02:15:25 -04:00
|
|
|
Char(MARKUP_OPEN) +
|
|
|
|
Char(MARKUP_CLOSE) +
|
|
|
|
Char(MARKUP_TEXT_OPEN) +
|
|
|
|
Char(MARKUP_TEXT_CLOSE) +
|
|
|
|
// Char(SPEAKER_OPEN) +
|
|
|
|
// Char(SPEAKER_CLOSE) +
|
|
|
|
Char(TEMPLATE_IND)
|
|
|
|
// Char(TEMPLATE_OPEN) +
|
|
|
|
// Char(TEMPLATE_CLOSE)
|
2021-12-13 14:04:12 -05:00
|
|
|
};
|
|
|
|
|
2022-08-22 02:15:25 -04:00
|
|
|
String read_file_to_string(const std::string& path) {
|
2021-12-12 03:41:54 -05:00
|
|
|
std::ifstream f(path);
|
2021-12-12 23:36:55 -05:00
|
|
|
{ // Some apps on Windows adds this signature in front of UTF-8 files when saving
|
|
|
|
char a, b, c;
|
2021-12-13 14:04:12 -05:00
|
|
|
a = f.get(); b = f.get(); c = f.get();
|
|
|
|
if (a != (char)0xEF || b != (char)0xBB || c != (char)0xBF)
|
2021-12-12 23:36:55 -05:00
|
|
|
f.seekg(0);
|
2021-12-13 14:04:12 -05:00
|
|
|
else
|
2022-08-27 04:40:06 -04:00
|
|
|
std::cerr << "NVL: Windows UTF-8 BOM skipped" << std::endl;
|
2021-12-12 23:36:55 -05:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
std::stringstream buffer;
|
|
|
|
buffer << f.rdbuf();
|
2022-08-22 02:15:25 -04:00
|
|
|
|
|
|
|
return to_NVL_string(buffer.str());
|
2021-12-12 03:41:54 -05:00
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
std::vector<String> split_string_by_lines(const String& str) {
|
|
|
|
std::vector<String> lines;
|
2022-08-18 12:17:43 -04:00
|
|
|
size_t pos = 0;
|
|
|
|
size_t prev = 0;
|
2022-08-20 22:12:11 -04:00
|
|
|
while ((pos = str.find(NEWLINE, prev)) != String::npos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
lines.push_back(str.substr(prev, pos - prev));
|
|
|
|
prev = pos + 1;
|
2021-05-16 12:59:18 -04:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
|
|
|
|
lines.push_back(str.substr(prev));
|
|
|
|
return lines;
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
inline bool IsNumeric(const String& str) {
|
2021-12-17 01:05:38 -05:00
|
|
|
bool negative = str[0] == NEGATIVE;
|
2021-12-12 03:41:54 -05:00
|
|
|
bool had_dot = false;
|
2021-12-17 01:05:38 -05:00
|
|
|
|
|
|
|
for (auto& c : negative ? str.substr(1) : str) {
|
2022-08-20 22:12:11 -04:00
|
|
|
if (NUMERIC.accept.find(c) == String::npos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
if (c == DECIMAL_DOT.accept[0]) {
|
|
|
|
if (had_dot)
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
had_dot = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
2021-05-19 17:02:57 -04:00
|
|
|
}
|
2021-12-17 01:05:38 -05:00
|
|
|
|
|
|
|
if (had_dot + negative == str.length())
|
|
|
|
return false;
|
|
|
|
|
2021-12-12 03:41:54 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
inline bool ContainsOnlyWS(const String& s) {
|
2021-12-17 01:05:38 -05:00
|
|
|
for (auto& c : s) {
|
2022-08-20 22:12:11 -04:00
|
|
|
if (WS.accept.find(c) == String::npos)
|
2021-12-17 01:05:38 -05:00
|
|
|
return false;
|
2021-12-12 03:41:54 -05:00
|
|
|
}
|
2021-12-17 01:05:38 -05:00
|
|
|
return true;
|
|
|
|
}
|
2021-12-13 14:04:12 -05:00
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
void SkipWS(const String& f, size_t& pos) {
|
|
|
|
while (WS.accept.find(f[pos]) != String::npos)
|
2021-12-12 03:41:54 -05:00
|
|
|
pos++;
|
2021-05-19 17:02:57 -04:00
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
void SkipToNextLine(const String& f, size_t& pos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
while (f[pos] != NEWLINE.accept[0])
|
|
|
|
pos++;
|
|
|
|
pos++;
|
2021-07-07 18:03:41 -04:00
|
|
|
}
|
2021-05-15 17:48:37 -04:00
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
void SkipComments(const String& f, size_t& pos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
SkipWS(f, pos);
|
|
|
|
while (f.substr(pos, 2) == COMMENT_BEGIN.accept) {
|
|
|
|
SkipToNextLine(f, pos);
|
|
|
|
SkipWS(f, pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
void SkipOverFirstChar(const String& f, size_t& pos) {
|
2021-12-12 22:20:28 -05:00
|
|
|
SkipWS(f, pos);
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
String GetToken(const String& f, size_t& pos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
SkipWS(f, pos);
|
|
|
|
auto start = pos;
|
|
|
|
while (++pos) {
|
2022-08-20 22:12:11 -04:00
|
|
|
if (SEPARATOR.accept.find(f[pos]) != String::npos)
|
2021-07-07 18:03:41 -04:00
|
|
|
break;
|
2021-12-17 01:05:38 -05:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
return f.substr(start, pos - start);
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
String PeekToken(const String& f, size_t pos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
SkipWS(f, pos);
|
|
|
|
auto start = pos;
|
|
|
|
while (++pos) {
|
2022-08-20 22:12:11 -04:00
|
|
|
if (SEPARATOR.accept.find(f[pos]) != String::npos)
|
2021-07-16 20:27:25 -04:00
|
|
|
break;
|
2021-12-17 01:05:38 -05:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
return f.substr(start, pos - start);
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
bool IsLegalSymbolName(const String& token) {
|
2021-12-17 01:05:38 -05:00
|
|
|
for (const auto& x: SPECIAL_SYMBOLS) {
|
|
|
|
if (token == x.accept)
|
|
|
|
return true;
|
|
|
|
}
|
2022-08-20 22:12:11 -04:00
|
|
|
if (ALPHA.accept.find(token[0]) == String::npos)
|
2021-12-12 03:41:54 -05:00
|
|
|
return false;
|
|
|
|
for (auto& i : token)
|
2022-08-20 22:12:11 -04:00
|
|
|
if (SYMBOL.accept.find(i) == String::npos)
|
2021-12-12 03:41:54 -05:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Parse::Object ParseExpression(const String& f, size_t& pos);
|
|
|
|
Parse::Object ParseArray(const String& f, size_t& pos, u32 layer) {
|
2021-12-12 22:20:28 -05:00
|
|
|
SkipComments(f, pos);
|
2021-12-12 03:41:54 -05:00
|
|
|
|
2022-08-18 12:17:43 -04:00
|
|
|
std::vector<Parse::Object> array{};
|
2021-12-12 22:20:28 -05:00
|
|
|
|
|
|
|
array.push_back(ParseExpression(f, pos));
|
|
|
|
while (PeekToken(f, pos)[0] != ARRAY_CLOSE) {
|
|
|
|
if (PeekToken(f, pos)[0] == ARRAY_DELIM)
|
|
|
|
SkipOverFirstChar(f, pos);
|
|
|
|
else
|
|
|
|
throw std::runtime_error("Invalid array member");
|
|
|
|
array.push_back(ParseExpression(f, pos));
|
2021-12-12 03:41:54 -05:00
|
|
|
}
|
|
|
|
|
2022-08-18 12:17:43 -04:00
|
|
|
return { Parse::Type::Array, array };
|
2021-12-12 03:41:54 -05:00
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
String ParseString(const String& f, size_t& pos) {
|
2021-12-12 22:20:28 -05:00
|
|
|
SkipComments(f, pos);
|
2022-08-18 12:17:43 -04:00
|
|
|
std::vector<size_t> discards{};
|
2021-12-12 03:41:54 -05:00
|
|
|
auto start = ++pos; // skip opening quote
|
|
|
|
do {
|
2021-12-12 22:20:28 -05:00
|
|
|
if (f[pos] == QUOTE) {
|
2021-07-07 18:03:41 -04:00
|
|
|
break;
|
2021-05-14 20:44:09 -04:00
|
|
|
}
|
2021-12-12 22:20:28 -05:00
|
|
|
else if (f[pos] == ESCAPE) {
|
2022-08-20 22:12:11 -04:00
|
|
|
if (ESCAPED.accept.find(f[pos]) != String::npos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
discards.push_back(pos++);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw std::runtime_error("Unrecognized escape sequence");
|
|
|
|
}
|
2021-12-12 22:20:28 -05:00
|
|
|
else if (f[pos] == NEWLINE) {
|
2021-12-12 03:41:54 -05:00
|
|
|
throw std::runtime_error("Unclosed String");
|
|
|
|
}
|
|
|
|
} while (pos++);
|
|
|
|
auto str = f.substr(start, pos++ - start);
|
2022-08-18 12:17:43 -04:00
|
|
|
for (size_t i = 0; i < discards.size(); i++) {
|
2021-12-12 03:41:54 -05:00
|
|
|
str.erase(discards[i] - start - i, 1);
|
2021-07-07 18:03:41 -04:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
return str;
|
2021-07-07 18:03:41 -04:00
|
|
|
}
|
2022-08-20 22:12:11 -04:00
|
|
|
u32 GetProcedureArity(const String& key) {
|
2022-08-18 12:17:43 -04:00
|
|
|
return Environment::ENVIRONMENT.get(key).length;
|
2021-12-12 22:20:28 -05:00
|
|
|
}
|
2021-05-14 23:17:55 -04:00
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Parse::Command ParseCommand(const String& f, size_t& pos) {
|
2021-12-12 22:20:28 -05:00
|
|
|
SkipComments(f, pos);
|
2021-07-07 18:03:41 -04:00
|
|
|
|
2021-12-12 03:41:54 -05:00
|
|
|
auto proc = GetToken(f, pos);
|
|
|
|
|
2021-12-12 22:20:28 -05:00
|
|
|
if (!IsLegalSymbolName(proc)) throw std::runtime_error("Illegal Procedure name");
|
2022-08-18 12:17:43 -04:00
|
|
|
Parse::Command c{ Parse::Object{ Parse::Type::Symbol, proc } };
|
|
|
|
for (u32 i = 0; i < GetProcedureArity(proc); i++) {
|
2021-12-12 22:20:28 -05:00
|
|
|
c.push_back(ParseExpression(f, pos));
|
2022-08-18 12:17:43 -04:00
|
|
|
}
|
2021-12-12 22:20:28 -05:00
|
|
|
|
2021-12-12 23:29:53 -05:00
|
|
|
SkipComments(f, pos);
|
2021-12-12 22:20:28 -05:00
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Parse::Object ParseExpression(const String& f, size_t& pos) {
|
2021-12-12 22:20:28 -05:00
|
|
|
SkipComments(f, pos);
|
|
|
|
|
|
|
|
auto t = PeekToken(f, pos);
|
|
|
|
if (t[0] == ARRAY_OPEN) {
|
|
|
|
SkipOverFirstChar(f, pos);
|
|
|
|
auto c = ParseArray(f, pos, 0);
|
|
|
|
if (PeekToken(f, pos)[0] != ARRAY_CLOSE)
|
|
|
|
throw std::runtime_error("Cannot match closing Array");
|
|
|
|
else
|
|
|
|
SkipOverFirstChar(f, pos);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
else if (t[0] == GROUP_OPEN) {
|
|
|
|
SkipOverFirstChar(f, pos);
|
|
|
|
auto c = ParseCommand(f, pos);
|
|
|
|
if (PeekToken(f, pos)[0] != GROUP_CLOSE)
|
|
|
|
throw std::runtime_error("Cannot match closing subexpression");
|
|
|
|
else
|
|
|
|
SkipOverFirstChar(f, pos);
|
2022-08-18 12:17:43 -04:00
|
|
|
return Parse::Object{ Parse::Type::Subexpression, c };
|
2021-05-14 20:44:09 -04:00
|
|
|
}
|
2021-12-12 22:20:28 -05:00
|
|
|
else if (t[0] == GROUP_CLOSE)
|
|
|
|
throw std::runtime_error("Cannot match closing subexpression, likely too few arguments");
|
|
|
|
else if (t[0] == QUOTE)
|
2022-08-18 12:17:43 -04:00
|
|
|
return { Parse::Type::String, ParseString(f, pos) };
|
2021-12-12 22:20:28 -05:00
|
|
|
else if (t[0] == ARRAY_CLOSE)
|
|
|
|
throw std::runtime_error("Cannot match closing array");
|
|
|
|
else {
|
|
|
|
auto token = GetToken(f, pos);
|
|
|
|
if (IsNumeric(token))
|
2022-08-22 02:15:25 -04:00
|
|
|
return { std::stof(to_std_string(token)) };
|
2021-12-12 22:20:28 -05:00
|
|
|
else if (IsLegalSymbolName(token))
|
2022-08-18 12:17:43 -04:00
|
|
|
return { Parse::Type::Symbol, token };
|
2021-12-12 22:20:28 -05:00
|
|
|
else
|
|
|
|
throw std::runtime_error("Illegal symbol");
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
|
2022-08-28 09:40:27 -04:00
|
|
|
/*
|
2022-05-10 02:42:12 -04:00
|
|
|
* NVL Markup Parsetree
|
|
|
|
*
|
2022-08-28 09:40:27 -04:00
|
|
|
* * - Vec:N - Vec:2 - Str: Str segment contents
|
2022-05-10 02:42:12 -04:00
|
|
|
* |
|
2022-08-28 09:40:27 -04:00
|
|
|
* - Vec:N - Str: T of Markup
|
|
|
|
* |
|
|
|
|
* OR
|
|
|
|
* |
|
|
|
|
* - Vec:2 - Str: T of Markup
|
|
|
|
* |
|
|
|
|
* - Vec: Params
|
2022-05-10 02:42:12 -04:00
|
|
|
*/
|
2022-08-28 09:40:27 -04:00
|
|
|
Parse::Object MatchMarkup(const String& s) {
|
2022-08-23 18:02:15 -04:00
|
|
|
static const srell::basic_regex<Char>
|
2022-08-27 03:04:39 -04:00
|
|
|
typer(uR"((?<!\\)\[([^\]]+)\]\s*\{([^\}]+)\})"), // G1 -> Specifiers, G2 -> Contents
|
2022-08-23 18:02:15 -04:00
|
|
|
effect(uR"(\s*(?:([^,\(\)]+?)\s*\(\s*([^\(\)]+?)\s*\)|([^,\(\)]+?))\s*(?:,\s*|$))"), // G1 & G2 -> Func, G3 -> Attr
|
|
|
|
param(uR"(([^,]+?)\s*(?:,\s*|$))"); // Comma split of func args
|
2022-05-10 02:42:12 -04:00
|
|
|
|
2022-08-27 03:04:39 -04:00
|
|
|
srell::match_results<String::const_iterator> tags_match, effects_match, params_match;
|
2022-05-10 02:42:12 -04:00
|
|
|
|
2022-08-28 09:40:27 -04:00
|
|
|
bool has_markup = false;
|
2022-05-10 02:42:12 -04:00
|
|
|
|
2022-08-21 16:24:13 -04:00
|
|
|
// Match tags
|
2022-08-28 09:40:27 -04:00
|
|
|
std::vector<Parse::Object> segments;
|
2022-08-20 22:12:11 -04:00
|
|
|
String::const_iterator tags_start(s.cbegin());
|
2022-08-23 18:02:15 -04:00
|
|
|
while (srell::regex_search(tags_start, s.cend(), tags_match, typer)) {
|
2022-05-10 02:42:12 -04:00
|
|
|
has_markup = true;
|
|
|
|
|
2022-08-28 09:40:27 -04:00
|
|
|
String before {tags_match.prefix().first, tags_match[0].first};
|
|
|
|
if (!before.empty())
|
|
|
|
segments.push_back({ Parse::Type::Array, {
|
|
|
|
{ Parse::Type::String, before },
|
|
|
|
{ Parse::Type::Array, std::vector<Parse::Object>{} }
|
|
|
|
}});
|
2022-08-18 12:17:43 -04:00
|
|
|
|
2022-08-28 09:40:27 -04:00
|
|
|
String inner = tags_match[2].str(); // markupped
|
2022-05-10 02:42:12 -04:00
|
|
|
|
2022-08-21 16:24:13 -04:00
|
|
|
// Match markup options
|
2022-08-28 09:40:27 -04:00
|
|
|
std::vector<Parse::Object> effects{};
|
2022-08-20 22:12:11 -04:00
|
|
|
String::const_iterator effects_start(tags_match[1].first);
|
2022-08-23 18:02:15 -04:00
|
|
|
while (srell::regex_search(effects_start, tags_match[1].second, effects_match, effect)) {
|
2022-08-28 09:40:27 -04:00
|
|
|
if (effects_match[3].matched) { // no params
|
2022-08-18 12:17:43 -04:00
|
|
|
effects.push_back({ Parse::Type::String, effects_match[3].str() });
|
2022-05-10 02:42:12 -04:00
|
|
|
}
|
2022-08-28 09:40:27 -04:00
|
|
|
else { // no params
|
|
|
|
|
|
|
|
// Comma split
|
2022-08-18 12:17:43 -04:00
|
|
|
std::vector<Parse::Object> args;
|
2022-08-20 22:12:11 -04:00
|
|
|
String::const_iterator params_start(effects_match[2].first);
|
2022-08-23 18:02:15 -04:00
|
|
|
while (srell::regex_search(params_start, effects_match[2].second, params_match, param)) {
|
2022-05-10 02:42:12 -04:00
|
|
|
size_t temp = 0;
|
2022-08-18 12:17:43 -04:00
|
|
|
args.push_back(ParseExpression(params_match[1].str() + SEPARATOR.accept[0], temp)); // PeekToken will freak out if I don't do this
|
2022-05-10 02:42:12 -04:00
|
|
|
params_start = params_match.suffix().first;
|
|
|
|
}
|
2022-08-18 12:17:43 -04:00
|
|
|
effects.push_back({ Parse::Type::Array, std::vector<Parse::Object>{ { Parse::Type::String, effects_match[1].str() }, { Parse::Type::Array, args } } });
|
2022-05-10 02:42:12 -04:00
|
|
|
}
|
|
|
|
effects_start = effects_match.suffix().first;
|
|
|
|
}
|
|
|
|
tags_start = tags_match.suffix().first;
|
2022-08-28 09:40:27 -04:00
|
|
|
segments.push_back({ Parse::Type::Array, std::vector<Parse::Object>{ { Parse::Type::String, inner }, { Parse::Type::Array, effects } } });
|
2022-05-10 02:42:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (has_markup) {
|
2022-08-28 09:40:27 -04:00
|
|
|
String end {tags_match.suffix().first, tags_match.suffix().second};
|
|
|
|
if (!end.empty())
|
|
|
|
segments.push_back({ Parse::Type::Array, {
|
|
|
|
{ Parse::Type::String, end },
|
|
|
|
{ Parse::Type::Array, std::vector<Parse::Object>{} }
|
|
|
|
}});
|
|
|
|
|
|
|
|
return { Parse::Type::Array, segments };
|
2022-05-10 02:42:12 -04:00
|
|
|
}
|
|
|
|
else {
|
2022-08-28 09:40:27 -04:00
|
|
|
return { Parse::Type::Array, std::vector<Parse::Object>{
|
|
|
|
{ Parse::Type::Array, {
|
|
|
|
{ Parse::Type::String, s} , { Parse::Type::Array, std::vector<Parse::Object>{} }
|
|
|
|
} } }
|
|
|
|
};
|
2022-05-10 02:42:12 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Parse::Command ParseDialogue(const String& s) {
|
2021-12-13 14:04:12 -05:00
|
|
|
if (s.substr(0, 2) == COMMAND_ESCAPE.accept) {
|
|
|
|
size_t dummy = 0;
|
2021-12-17 01:05:38 -05:00
|
|
|
// Pad a space towards the end, the helpers do not expect strings to immediately terminate
|
2022-08-22 02:15:25 -04:00
|
|
|
return ParseCommand(s.substr(2) + u" ", dummy);
|
2021-12-13 14:04:12 -05:00
|
|
|
}
|
|
|
|
|
2022-08-18 12:17:43 -04:00
|
|
|
// assume arity for SwitchSpeaker and Say
|
2021-12-17 01:05:38 -05:00
|
|
|
|
2021-12-13 14:04:12 -05:00
|
|
|
if (s.back() == SPEAKER_CLOSE) {
|
|
|
|
if (s.front() == SPEAKER_OPEN) {
|
|
|
|
auto name = s.substr(1, s.length() - 2);
|
2022-08-20 22:12:11 -04:00
|
|
|
// if (IsLegalSymbolName(name))
|
2022-08-22 02:15:25 -04:00
|
|
|
return { { Parse::Type::Symbol, u"SwitchSpeaker" }, { Parse::Type::String, name } };
|
2021-12-13 14:04:12 -05:00
|
|
|
}
|
|
|
|
else
|
|
|
|
throw std::runtime_error("Malformed speaker command");
|
|
|
|
}
|
|
|
|
|
2022-08-28 09:40:27 -04:00
|
|
|
return { { Parse::Type::Symbol, u"Say" }, MatchMarkup(s) };
|
2021-05-14 11:47:54 -04:00
|
|
|
}
|
2021-05-15 17:48:37 -04:00
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Parse::Scene ParseScene(const String& f, size_t& pos) {
|
2021-12-12 03:41:54 -05:00
|
|
|
SkipComments(f, pos);
|
|
|
|
|
|
|
|
if (!(GetToken(f, pos) == BEGIN.accept))
|
|
|
|
throw std::runtime_error("Could not match accept at root");
|
|
|
|
|
|
|
|
auto scene_name = GetToken(f, pos);
|
2021-12-12 22:20:28 -05:00
|
|
|
if (!IsLegalSymbolName(scene_name)) throw std::runtime_error("Illegal Scene name");
|
2022-08-18 12:17:43 -04:00
|
|
|
Parse::Scene s{ scene_name };
|
2021-12-12 03:41:54 -05:00
|
|
|
|
2021-12-12 22:20:28 -05:00
|
|
|
bool dialogue_mode = false;
|
|
|
|
|
2022-08-27 03:04:39 -04:00
|
|
|
SkipComments(f, pos);
|
2021-12-12 03:41:54 -05:00
|
|
|
while (PeekToken(f, pos) != END.accept) {
|
2021-12-12 22:20:28 -05:00
|
|
|
if (!dialogue_mode)
|
|
|
|
if (PeekToken(f, pos) == DIALOGUE_OPEN.accept) {
|
|
|
|
dialogue_mode = true;
|
|
|
|
GetToken(f, pos); // skip DIALOGUE_OPEN
|
|
|
|
SkipComments(f, pos);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
s.append(ParseCommand(f, pos));
|
|
|
|
else {
|
|
|
|
auto end = f.find(NEWLINE.accept + DIALOGUE_CLOSE.accept, pos);
|
2022-08-20 22:12:11 -04:00
|
|
|
if (end == String::npos)
|
2021-12-12 22:20:28 -05:00
|
|
|
throw std::runtime_error("Dialogue does not terminate");
|
|
|
|
auto lines = split_string_by_lines(f.substr(pos, end - pos));
|
|
|
|
for (auto& l : lines) {
|
2021-12-17 01:05:38 -05:00
|
|
|
if (!l.empty() && !ContainsOnlyWS(l))
|
|
|
|
s.append(ParseDialogue(l));
|
2021-12-12 22:20:28 -05:00
|
|
|
}
|
|
|
|
dialogue_mode = false;
|
|
|
|
pos = end;
|
|
|
|
GetToken(f, pos); // skip DIALOGUE_CLOSE
|
|
|
|
SkipComments(f, pos);
|
|
|
|
}
|
2021-07-16 20:27:25 -04:00
|
|
|
}
|
2021-05-14 16:51:04 -04:00
|
|
|
|
2021-12-12 22:20:28 -05:00
|
|
|
if (dialogue_mode)
|
|
|
|
throw std::runtime_error("Illegal Scene end");
|
|
|
|
|
2021-12-12 03:41:54 -05:00
|
|
|
GetToken(f, pos); // skip END
|
2021-05-14 16:51:04 -04:00
|
|
|
|
2021-12-17 01:05:38 -05:00
|
|
|
SkipComments(f, pos);
|
2021-12-12 22:20:28 -05:00
|
|
|
return s;
|
2021-12-12 03:41:54 -05:00
|
|
|
}
|
|
|
|
}
|
2021-05-15 17:48:37 -04:00
|
|
|
|
2021-12-17 01:05:38 -05:00
|
|
|
namespace NVL::Parse {
|
2022-08-18 12:17:43 -04:00
|
|
|
|
|
|
|
Object::Object(const Number& v) : type(Type::Number), value(v) { }
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Object::Object(Type t, const String& v) : type(t), value(v) {
|
2022-08-18 12:17:43 -04:00
|
|
|
if (t != Type::String && t != Type::Symbol) throw std::runtime_error("Bad type when constructing object!");
|
|
|
|
}
|
|
|
|
|
|
|
|
Object::Object(Type t, const std::vector<Object>& v) : type(t), value(v) {
|
|
|
|
if (t != Type::Array && t != Type::Subexpression) throw std::runtime_error("Bad type when constructing object!");
|
|
|
|
}
|
|
|
|
|
|
|
|
Object::Object(Number&& v) : type(Type::Number), value(std::move(v)) {
|
|
|
|
}
|
|
|
|
|
2022-08-20 22:12:11 -04:00
|
|
|
Object::Object(Type t, String&& v) : type(t), value(std::move(v)) {
|
2022-08-18 12:17:43 -04:00
|
|
|
if (t != Type::String && t != Type::Symbol) throw std::runtime_error("Bad type when constructing object!");
|
|
|
|
}
|
|
|
|
|
|
|
|
Object::Object(Type t, std::vector<Object>&& v) : type(t), value(std::move(v)) {
|
|
|
|
if (t != Type::Array && t != Type::Subexpression) throw std::runtime_error("Bad type when constructing object!");
|
|
|
|
}
|
|
|
|
|
2021-12-17 01:05:38 -05:00
|
|
|
std::vector<Scene> ParseFile(const std::string& path) {
|
2022-08-22 02:15:25 -04:00
|
|
|
String f = read_file_to_string(path);
|
2021-05-14 11:47:54 -04:00
|
|
|
|
2021-12-12 03:41:54 -05:00
|
|
|
std::vector<Scene> list {}; // Vector of scenes which each contain a vector of Parses
|
|
|
|
for (size_t i = 0; i < f.length(); i++) {
|
2021-12-12 22:20:28 -05:00
|
|
|
list.push_back(ParseScene(f, i));
|
2021-05-14 16:51:04 -04:00
|
|
|
}
|
2021-07-16 20:27:25 -04:00
|
|
|
|
2021-12-17 01:05:38 -05:00
|
|
|
return list;
|
2021-07-16 20:27:25 -04:00
|
|
|
}
|
2021-05-14 17:21:10 -04:00
|
|
|
}
|
2021-12-12 03:41:54 -05:00
|
|
|
|