Nuked everything, REAL soulless lexer implemented
This commit is contained in:
parent
ecad7b3817
commit
e5759de673
7 changed files with 383 additions and 552 deletions
|
@ -4,6 +4,6 @@
|
|||
cmake_minimum_required (VERSION 3.8)
|
||||
|
||||
# Add source to this project's executable.
|
||||
add_executable (NouVeL "NouVeL.cpp" "NVL.cpp" "NVL.h")
|
||||
add_executable (NouVeL "NouVeL.cpp" "NVL.cpp" "NVL.h" "SymbolConfig.h")
|
||||
|
||||
# TODO: Add tests and install targets if needed.
|
||||
|
|
778
NouVeL/NVL.cpp
778
NouVeL/NVL.cpp
|
@ -1,514 +1,318 @@
|
|||
#include <map>
|
||||
#include "NVL.h"
|
||||
#include "SymbolConfig.h"
|
||||
#include <unordered_map>
|
||||
|
||||
namespace {
|
||||
// general helpers, may move these into a different translation unit
|
||||
bool str_test_every_char_not(int (*test)(int), std::string str)
|
||||
{
|
||||
for (auto& x : str)
|
||||
{
|
||||
if (!(*test)(x))
|
||||
return false;
|
||||
namespace NVL {
|
||||
std::unordered_map<char, Char_Type> intialize_map() {
|
||||
std::unordered_map<char, Char_Type> Char_Map;
|
||||
for (char c : "abcdefghijklmnopqrstuvwxyz") {
|
||||
Char_Map[c] = Char_Type::LOWERCASE;
|
||||
}
|
||||
return true;
|
||||
for (char c : "ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
|
||||
Char_Map[c] = Char_Type::UPPERCASE;
|
||||
}
|
||||
for (char c : "1234567890") {
|
||||
Char_Map[c] = Char_Type::NUMERIC;
|
||||
}
|
||||
for (char c : "<{([") {
|
||||
Char_Map[c] = Char_Type::DELIM_BEG;
|
||||
}
|
||||
for (char c : ">})]") {
|
||||
Char_Map[c] = Char_Type::DELIM_END;
|
||||
}
|
||||
for (char c : " \t") {
|
||||
Char_Map[c] = Char_Type::WHITESPACE;
|
||||
}
|
||||
for (char c : "\'\"") {
|
||||
Char_Map[c] = Char_Type::QUOTE;
|
||||
}
|
||||
Char_Map['\n'] = Char_Type::RETURN;
|
||||
Char_Map['.'] = Char_Type::DOT;
|
||||
Char_Map['_'] = Char_Type::UNDERSCORE;
|
||||
return Char_Map;
|
||||
}
|
||||
void indent_loop(int indent)
|
||||
{
|
||||
for (int i = 0; i < indent; i++)
|
||||
{
|
||||
std::cout << "\t";
|
||||
|
||||
const auto Char_Map = intialize_map();
|
||||
|
||||
Char_Type determine_type(char c) {
|
||||
return Char_Map.at(c);
|
||||
}
|
||||
|
||||
Char_Type determine_type_escaped(char c) {
|
||||
if (Char_Map.find(c) != Char_Map.end())
|
||||
return Char_Map.at(c);
|
||||
else
|
||||
return Char_Type::LOWERCASE;
|
||||
}
|
||||
|
||||
Token::Token(char c, Char_Type type) {
|
||||
value = std::string{ c };
|
||||
switch (type) {
|
||||
case Char_Type::DELIM_BEG:
|
||||
switch (c) {
|
||||
case '<':
|
||||
this->type = Token_Type::DIALOGUE_BEG;
|
||||
break;
|
||||
case '{':
|
||||
this->type = Token_Type::SEQUENCE_BEG;
|
||||
break;
|
||||
// case '(':
|
||||
// break;
|
||||
case '[':
|
||||
this->type = Token_Type::LIST_BEG;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to construct token: " << c << '\n'; // error, cannot do error codes in constructor..
|
||||
}
|
||||
break;
|
||||
case Char_Type::DELIM_END:
|
||||
switch (c) {
|
||||
case '>':
|
||||
this->type = Token_Type::DIALOGUE_END;
|
||||
break;
|
||||
case '}':
|
||||
this->type = Token_Type::SEQUENCE_END;
|
||||
break;
|
||||
// case ')':
|
||||
// break;
|
||||
case ']':
|
||||
this->type = Token_Type::LIST_END;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to construct token: " << c << '\n'; // error
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to construct token: " << c << '\n'; // error
|
||||
}
|
||||
}
|
||||
int query_ifstream_line_number(std::ifstream& stream)
|
||||
{
|
||||
std::streampos pos = stream.tellg();
|
||||
stream.seekg(0); // we should not be eof
|
||||
int i = 0;
|
||||
char c{};
|
||||
while (stream.tellg() < pos)
|
||||
{
|
||||
if (c == '\n')
|
||||
i++;
|
||||
stream.get(c);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// facilities for scopers
|
||||
const std::map<char, char> SCOPER_MAP = {
|
||||
{'{', '}' }, // CURLY
|
||||
{'<', '>'}, // ANGLED
|
||||
{'(', ')'}, // BRACKET
|
||||
{'[', ']'}, // SQUARE
|
||||
|
||||
{'\'', '\''}, // SQUOTE
|
||||
{'\"', '\"'} // DQUOTE
|
||||
};
|
||||
NVLError tokenize(std::string file, std::vector<Token>& tokens) {
|
||||
Token_Type state = Token_Type::STANDBY;
|
||||
|
||||
bool need_escape(char c)
|
||||
{
|
||||
// todo: refactor, too cryptic
|
||||
return (c == '\'' || c == '\"');
|
||||
}
|
||||
std::string current;
|
||||
|
||||
struct ScopeContext {
|
||||
std::vector<char> Scope;
|
||||
};
|
||||
int current_line = 1;
|
||||
|
||||
std::streampos scope_cope(char scope_char, std::ifstream& stream)
|
||||
{
|
||||
// important
|
||||
// scope_cope() expects the scope opener to already have been fetched and stored in scope_char
|
||||
// i.o.w. it wont be able to cope properly because there will be one extra level of scope that it cannot match
|
||||
for (int i = 0; i < file.length(); i++ ) {
|
||||
switch (state) {
|
||||
case Token_Type::STANDBY:
|
||||
current = "";
|
||||
|
||||
std::streampos initial = stream.tellg();
|
||||
|
||||
std::streampos final;
|
||||
char c{};
|
||||
|
||||
ScopeContext current_scope;
|
||||
current_scope.Scope.push_back(scope_char);
|
||||
|
||||
while (!current_scope.Scope.empty()) {
|
||||
stream.get(c);
|
||||
|
||||
// push scope if opening scoper is found
|
||||
bool set_scope_on_this_iter = false;
|
||||
// there cannot be any more scope inside of quoted strings
|
||||
if (current_scope.Scope.back() != '\'' && current_scope.Scope.back() != '\"') {
|
||||
for (auto const& x : SCOPER_MAP)
|
||||
{
|
||||
if (c == x.first) {
|
||||
current_scope.Scope.push_back(c);
|
||||
set_scope_on_this_iter = true;
|
||||
break;
|
||||
}
|
||||
switch (determine_type(file[i])) {
|
||||
case Char_Type::LOWERCASE:
|
||||
case Char_Type::UPPERCASE:
|
||||
case Char_Type::UNDERSCORE:
|
||||
state = Token_Type::ID;
|
||||
current += file[i];
|
||||
break;
|
||||
case Char_Type::NUMERIC:
|
||||
state = Token_Type::INT;
|
||||
current += file[i];
|
||||
break;
|
||||
case Char_Type::DELIM_BEG:
|
||||
tokens.push_back(Token(file[i], Char_Type::DELIM_BEG));
|
||||
break;
|
||||
case Char_Type::DELIM_END:
|
||||
tokens.push_back(Token(file[i], Char_Type::DELIM_END));
|
||||
break;
|
||||
case Char_Type::RETURN:
|
||||
tokens.push_back(Token('\n', Token_Type::RETURN));
|
||||
current_line++;
|
||||
break;
|
||||
case Char_Type::WHITESPACE:
|
||||
break;
|
||||
case Char_Type::SEMICOLON:
|
||||
state = Token_Type::COMMENT;
|
||||
break;
|
||||
case Char_Type::QUOTE:
|
||||
state = Token_Type::STRING;
|
||||
tokens.push_back(Token(file[i], Token_Type::QUOTE));
|
||||
break;
|
||||
case Char_Type::DOT:
|
||||
state = Token_Type::FLOAT;
|
||||
current += file[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// pop last scope if ending scoper is found
|
||||
// set_scope_on_this_iter to prevent immediately popping scopers that have the same closers and openers
|
||||
if (!set_scope_on_this_iter && c == SCOPER_MAP.at(current_scope.Scope.back()))
|
||||
current_scope.Scope.pop_back();
|
||||
|
||||
if (c == '\\') // encounters an escaped sequence
|
||||
{
|
||||
char cc = stream.peek();
|
||||
if (need_escape(cc)) // see if the escaped char happens to be a scoper
|
||||
{
|
||||
stream.get(c); // skip it, since whoever wrote the script escaped it
|
||||
break;
|
||||
case Token_Type::ID:
|
||||
switch (determine_type(file[i])) {
|
||||
case Char_Type::LOWERCASE:
|
||||
case Char_Type::UPPERCASE:
|
||||
case Char_Type::NUMERIC:
|
||||
case Char_Type::UNDERSCORE:
|
||||
current += file[i];
|
||||
break;
|
||||
case Char_Type::DELIM_BEG:
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
case Char_Type::DELIM_END:
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token(file[i], Char_Type::DELIM_END));
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::RETURN:
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token('\n', Token_Type::RETURN));
|
||||
current_line++;
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::WHITESPACE:
|
||||
tokens.push_back(Token(current, state));
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::SEMICOLON:
|
||||
case Char_Type::QUOTE:
|
||||
case Char_Type::DOT:
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
}
|
||||
}
|
||||
|
||||
// cannot match all the scopes and has read to the end of the file
|
||||
if (stream.eof())
|
||||
throw std::runtime_error("Can't cope with scope at line " + std::to_string(query_ifstream_line_number(stream)));
|
||||
|
||||
}
|
||||
|
||||
final = stream.tellg();
|
||||
stream.seekg(initial);
|
||||
|
||||
return final;
|
||||
}
|
||||
|
||||
// facilities for parsing
|
||||
void skip_ws(std::ifstream& stream)
|
||||
{
|
||||
stream >> std::ws;
|
||||
}
|
||||
|
||||
bool only_ws_before_next_newline(std::ifstream& stream) {
|
||||
if (!isspace(stream.peek()))
|
||||
return false;
|
||||
|
||||
std::streampos initial = stream.tellg();
|
||||
char c{};
|
||||
stream.get(c);
|
||||
while (c != '\n')
|
||||
{
|
||||
stream.get(c);
|
||||
if (!isspace(c)) {
|
||||
stream.seekg(initial);
|
||||
return false;
|
||||
break;
|
||||
case Token_Type::INT:
|
||||
switch (determine_type(file[i])) {
|
||||
case Char_Type::LOWERCASE:
|
||||
case Char_Type::UPPERCASE:
|
||||
case Char_Type::UNDERSCORE:
|
||||
case Char_Type::DELIM_BEG:
|
||||
case Char_Type::SEMICOLON:
|
||||
case Char_Type::QUOTE:
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
case Char_Type::NUMERIC:
|
||||
current += file[i];
|
||||
break;
|
||||
case Char_Type::DELIM_END:
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token(file[i], Char_Type::DELIM_END));
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::RETURN:
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token('\n', Token_Type::RETURN));
|
||||
current_line++;
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::WHITESPACE:
|
||||
tokens.push_back(Token(current, state));
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::DOT:
|
||||
state = Token_Type::FLOAT;
|
||||
current += file[i];
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Token_Type::FLOAT:
|
||||
switch (determine_type(file[i])) {
|
||||
case Char_Type::LOWERCASE:
|
||||
case Char_Type::UPPERCASE:
|
||||
case Char_Type::UNDERSCORE:
|
||||
case Char_Type::SEMICOLON:
|
||||
case Char_Type::QUOTE:
|
||||
case Char_Type::DOT:
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
case Char_Type::NUMERIC:
|
||||
current += file[i];
|
||||
break;
|
||||
case Char_Type::DELIM_BEG:
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
case Char_Type::DELIM_END:
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token(file[i], Char_Type::DELIM_END));
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::RETURN:
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token('\n', Token_Type::RETURN));
|
||||
current_line++;
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
case Char_Type::WHITESPACE:
|
||||
tokens.push_back(Token(current, state));
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Token_Type::STRING:
|
||||
if (determine_type_escaped(file[i]) == Char_Type::RETURN)
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
if (determine_type_escaped(file[i]) == Char_Type::QUOTE && current.back() != '\\') {
|
||||
tokens.push_back(Token(current, state));
|
||||
tokens.push_back(Token('\"', Token_Type::QUOTE));
|
||||
state = Token_Type::STANDBY;
|
||||
}
|
||||
else {
|
||||
current += file[i];
|
||||
}
|
||||
break;
|
||||
case Token_Type::DIALOGUE:
|
||||
// if (determine_type(file[i]) == Char_Type::)
|
||||
;
|
||||
break;
|
||||
case Token_Type::COMMENT:
|
||||
if (determine_type_escaped(file[i]) == Char_Type::RETURN) {
|
||||
tokens.push_back(Token('\n', Token_Type::RETURN));
|
||||
current_line++;
|
||||
}
|
||||
state = Token_Type::STANDBY;
|
||||
break;
|
||||
default:
|
||||
return { ErrCode::PARSE_ERROR, current_line };
|
||||
}
|
||||
}
|
||||
|
||||
stream.seekg(initial);
|
||||
return true;
|
||||
return { ErrCode::SUCCESS, -1 };
|
||||
}
|
||||
|
||||
std::string read_sequence_name(std::ifstream& nvl)
|
||||
{
|
||||
// this function will move the reading head of nvl, this is hard to keep track of but simplifies the program
|
||||
skip_ws(nvl);
|
||||
std::string token;
|
||||
char c{};
|
||||
NVLError construct_tree(std::vector<Token> tokens, ASTNode& root) {
|
||||
Context root_context;
|
||||
ASTNode* state = &root;
|
||||
int current_line = 0;
|
||||
|
||||
nvl.get(c);
|
||||
while (c != ' ' && c != '{' && c != '\n')
|
||||
{
|
||||
token += c;
|
||||
nvl.get(c);
|
||||
}
|
||||
nvl.putback(c);
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
switch (tokens[i].type) {
|
||||
case Token_Type::RETURN:
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
|
||||
void skip_comment(std::ifstream& nvl, char& c)
|
||||
{
|
||||
if (nvl.peek() == ';')
|
||||
{
|
||||
nvl.get(c);
|
||||
while (c != '\n')
|
||||
{
|
||||
nvl.get(c);
|
||||
}
|
||||
|
||||
skip_ws(nvl);
|
||||
|
||||
skip_comment(nvl, c);
|
||||
|
||||
skip_ws(nvl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace NVL
|
||||
{
|
||||
void parse_Dialogue(Context old_context, std::ifstream& nvl)
|
||||
{
|
||||
if (std::get<Call*>(old_context.Scope_Hierarchy.back())->Objects.size() != 1)
|
||||
throw std::runtime_error("Failed to parse dialogue at line " + std::to_string(query_ifstream_line_number(nvl)));
|
||||
|
||||
const Object say_command = Object("Say");
|
||||
|
||||
Object speaker = std::get<Call*>(old_context.Scope_Hierarchy.back())->Objects.front(); // Copy speaker object
|
||||
|
||||
Sequence* parent_sequence = std::get<Sequence*>(old_context.Scope_Hierarchy.rbegin()[1]);
|
||||
// old call has never been pushed, we need to exit parse_Call right away when we finish parsing the dialogue, because this already deals with calls
|
||||
|
||||
char c{};
|
||||
nvl.get(c); // get <
|
||||
std::streampos end = scope_cope('<', nvl);
|
||||
|
||||
skip_ws(nvl);
|
||||
|
||||
do
|
||||
{
|
||||
parent_sequence->Calls.push_back(Call());
|
||||
parent_sequence->Calls.back().Objects.push_back(say_command);
|
||||
parent_sequence->Calls.back().Objects.push_back(speaker);
|
||||
|
||||
std::string text;
|
||||
|
||||
if (nvl.peek() == '>')
|
||||
break;
|
||||
case Token_Type::ID:
|
||||
|
||||
// im not sure if the execution order here will be problematic, seems to work ok
|
||||
while (!( (c == '\n' && only_ws_before_next_newline(nvl)) || nvl.peek() == '>'))
|
||||
{
|
||||
nvl.get(c);
|
||||
text += c;
|
||||
break;
|
||||
case Token_Type::INT:
|
||||
|
||||
break;
|
||||
case Token_Type::FLOAT:
|
||||
|
||||
break;
|
||||
case Token_Type::BOOL:
|
||||
|
||||
break;
|
||||
case Token_Type::SEQUENCE_BEG:
|
||||
|
||||
break;
|
||||
case Token_Type::SEQUENCE_END:
|
||||
|
||||
break;
|
||||
case Token_Type::DIALOGUE_BEG:
|
||||
|
||||
break;
|
||||
case Token_Type::DIALOGUE_END:
|
||||
|
||||
break;
|
||||
case Token_Type::LIST_BEG:
|
||||
|
||||
break;
|
||||
case Token_Type::LIST_END:
|
||||
|
||||
break;
|
||||
case Token_Type::QUOTE:
|
||||
|
||||
break;
|
||||
case Token_Type::DIALOGUE:
|
||||
|
||||
break;
|
||||
default:
|
||||
return { ErrCode::SYNTAX_ERROR, current_line };
|
||||
}
|
||||
|
||||
while (isspace(text.back()))
|
||||
text.erase(text.size()-1, 1);
|
||||
}
|
||||
|
||||
for (size_t i = text.find('\n'); i != std::string::npos; i = text.find('\n'))
|
||||
{
|
||||
int e = 0;
|
||||
std::cout << text.size();
|
||||
while (isspace(text.at(i + e)))
|
||||
e++;
|
||||
|
||||
text.erase(i, e);
|
||||
text.insert(i, " ");
|
||||
}
|
||||
|
||||
parent_sequence->Calls.back().Objects.push_back(Object(text));
|
||||
skip_ws(nvl);
|
||||
} while (nvl.tellg() < end - static_cast<std::streampos>(1));
|
||||
|
||||
nvl.get(c); // get >
|
||||
|
||||
skip_ws(nvl);
|
||||
return;
|
||||
return { ErrCode::SUCCESS, current_line };
|
||||
}
|
||||
|
||||
template <bool is_parent_call, bool is_symbol>
|
||||
void parse_Object(Context parent_context, std::ifstream& nvl)
|
||||
{
|
||||
// chevron dialogue is now handled in a separate function
|
||||
Object this_object;
|
||||
Context this_context = parent_context;
|
||||
this_context.Scope_Hierarchy.push_back(&this_object);
|
||||
|
||||
std::variant<std::string, std::vector<Object>> content = ""; // init to empty str
|
||||
char c{};
|
||||
|
||||
// early exit for comments
|
||||
if (nvl.peek() == ';' && is_parent_call)
|
||||
{
|
||||
while (nvl.peek() != '\n')
|
||||
nvl.get(c);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (nvl.peek())
|
||||
{
|
||||
case '[': // List
|
||||
{
|
||||
nvl.get(c);
|
||||
std::streampos end = scope_cope('[', nvl);
|
||||
while (nvl.tellg() < end - static_cast<std::streampos>(1))
|
||||
{
|
||||
parse_Object<false, false>(this_context, nvl);
|
||||
skip_ws(nvl);
|
||||
}
|
||||
nvl.get(c); // skip ending scoper (']')
|
||||
break;
|
||||
}
|
||||
case '\"': // String
|
||||
{
|
||||
nvl.get(c);
|
||||
std::streampos end = scope_cope('\"', nvl);
|
||||
while (nvl.tellg() < end - static_cast<std::streampos>(1))
|
||||
{
|
||||
nvl.get(c);
|
||||
|
||||
// do not concat escaping '\' to content
|
||||
if (c == '\\' && (nvl.peek() == '\'' || nvl.peek() == '\"'))
|
||||
continue;
|
||||
|
||||
content = std::get<std::string>(content) + c;
|
||||
}
|
||||
nvl.get(c); // skip ending scoper
|
||||
|
||||
this_object.Value = std::get<std::string>(content);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
nvl.get(c);
|
||||
while (c != ' ' && c != '\n' && c != '}' && c != ']' && c != ',')
|
||||
{
|
||||
content = std::get<std::string>(content) + c;
|
||||
nvl.get(c);
|
||||
}
|
||||
|
||||
// ']' handled in next object parse, '}' handled in sequence parse, '\n' will be skipped somewhere with skip_ws(), where? idk
|
||||
if (c == ']' || c == '}' || c == '\n')
|
||||
{
|
||||
nvl.putback(c);
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
// try number
|
||||
this_object.Value = std::stof(std::get<std::string>(content));
|
||||
}
|
||||
catch (std::exception)
|
||||
{
|
||||
if (std::get<std::string>(content) == "true")
|
||||
{
|
||||
this_object.Value = true;
|
||||
}
|
||||
else if (std::get<std::string>(content) == "false") {
|
||||
this_object.Value = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// early return if string is empty, this should only happen if calling from an object (not a call)
|
||||
if (str_test_every_char_not(&isspace, std::get<std::string>(content)))
|
||||
return;
|
||||
|
||||
// default case if content does not match keywords
|
||||
this_object.Value = std::get<std::string>(content);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
this_object.Is_Symbol = is_symbol;
|
||||
|
||||
if (is_parent_call)
|
||||
std::get<Call*>(parent_context.Scope_Hierarchy.back())->Objects.push_back(this_object);
|
||||
else if (std::get<Object*>(parent_context.Scope_Hierarchy.back())->Value.index() == 4) // 4 for list, indicates that parent is an object that is already a vector of objects
|
||||
std::get<std::vector<Object>>(
|
||||
std::get<Object*>(parent_context.Scope_Hierarchy.back())->Value
|
||||
).push_back(this_object);
|
||||
else // parent is not yet vector, initialize as (change from nil to) vector
|
||||
std::get<Object*>(parent_context.Scope_Hierarchy.back())->Value = std::vector<Object> { this_object };
|
||||
// the case for chevron dialogues are handled in the switch, there is an early return in the chevron case
|
||||
}
|
||||
|
||||
void parse_Call(Context parent_context, std::ifstream& nvl)
|
||||
{
|
||||
Call this_call;
|
||||
|
||||
Context this_context = parent_context;
|
||||
this_context.Scope_Hierarchy.push_back(&this_call);
|
||||
|
||||
skip_ws(nvl);
|
||||
|
||||
// do not push anything if line starts with a comment
|
||||
char c{};
|
||||
skip_comment(nvl, c);
|
||||
// early exit if sequence is empty
|
||||
if (nvl.peek() == '}')
|
||||
return;
|
||||
|
||||
while (!(nvl.peek() == '\n' || nvl.peek() == '}'))
|
||||
{
|
||||
skip_ws(nvl);
|
||||
if (nvl.peek() == '<') // dialogue! abandon current call, parse_Dialogue deals with pushing calls
|
||||
{
|
||||
parse_Dialogue(this_context, nvl);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_Object<true, false>(this_context, nvl);
|
||||
}
|
||||
|
||||
std::get<Sequence*>(parent_context.Scope_Hierarchy.back())->Calls.push_back(this_call);
|
||||
}
|
||||
|
||||
void parse_Sequence(Context parent_context, std::ifstream& nvl)
|
||||
{
|
||||
Sequence this_sequence;
|
||||
Context this_context = parent_context;
|
||||
this_context.Scope_Hierarchy.push_back(&this_sequence);
|
||||
|
||||
skip_ws(nvl);
|
||||
|
||||
char c{};
|
||||
skip_comment(nvl, c);
|
||||
|
||||
this_sequence = Sequence(read_sequence_name(nvl));
|
||||
|
||||
skip_ws(nvl);
|
||||
|
||||
nvl.get(c); // get {
|
||||
|
||||
if (c != '{')
|
||||
throw std::runtime_error("Sequence parse failed at line " + std::to_string(query_ifstream_line_number(nvl)));
|
||||
|
||||
std::streampos end_pos = scope_cope(c, nvl);
|
||||
|
||||
while (nvl.tellg() < end_pos - static_cast<std::streampos>(1))
|
||||
{
|
||||
parse_Call(this_context, nvl);
|
||||
skip_ws(nvl);
|
||||
}
|
||||
|
||||
nvl.get(c); // get }
|
||||
|
||||
std::get<Tree*>(parent_context.Scope_Hierarchy.back())->Sequences.push_back(this_sequence);
|
||||
}
|
||||
|
||||
void parse_NVL(Tree& root, std::string path)
|
||||
{
|
||||
std::ifstream nvl;
|
||||
std::cout << "Reading file " << path << "..." << std::endl;
|
||||
nvl.open(path);
|
||||
|
||||
if (nvl.is_open()) {
|
||||
Context current_context;
|
||||
current_context.Scope_Hierarchy.push_back(&root);
|
||||
|
||||
skip_ws(nvl); // just in case the file is completely empty
|
||||
|
||||
while (!nvl.eof()) {
|
||||
// parse_Sequence() already takes care of comments before a sequence
|
||||
parse_Sequence(current_context, nvl);
|
||||
skip_ws(nvl);
|
||||
|
||||
char c{};
|
||||
skip_comment(nvl, c);
|
||||
|
||||
skip_ws(nvl);
|
||||
}
|
||||
|
||||
nvl.close();
|
||||
} else
|
||||
throw std::runtime_error("Unable to read file " + path);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Object::Print(int indent)
|
||||
{
|
||||
indent_loop(indent);
|
||||
std::cout << "Object: ";
|
||||
|
||||
switch (Value.index())
|
||||
{
|
||||
case 0:
|
||||
std::cout << "Nil" << std::endl;
|
||||
break;
|
||||
case 1:
|
||||
std::cout << "Float: " << std::get<float>(Value) << std::endl;
|
||||
break;
|
||||
case 2:
|
||||
if (Is_Symbol)
|
||||
std::cout << "Symbol: " << std::get<std::string>(Value) << std::endl;
|
||||
else
|
||||
std::cout << "String: " << std::get<std::string>(Value) << std::endl;
|
||||
break;
|
||||
case 3:
|
||||
std::cout << "Bool: " << std::boolalpha << std::get<bool>(Value) << std::endl;
|
||||
break;
|
||||
case 4:
|
||||
std::cout << "List:" << std::endl;
|
||||
|
||||
for (auto& x : std::get<std::vector<Object>>(Value))
|
||||
{
|
||||
x.Print(indent + 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Call::Print(int indent)
|
||||
{
|
||||
indent_loop(indent);
|
||||
std::cout << "Call:" << std::endl;
|
||||
|
||||
for (auto& x : Objects)
|
||||
{
|
||||
x.Print(indent + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Sequence::Print(int indent)
|
||||
{
|
||||
indent_loop(indent);
|
||||
std::cout << "Sequence " << Name << ":" << std::endl;
|
||||
|
||||
for (auto& x : Calls)
|
||||
{
|
||||
x.Print(indent + 1);
|
||||
}
|
||||
|
||||
indent_loop(indent);
|
||||
std::cout << "-----------------------------------------------" << std::endl;
|
||||
}
|
||||
|
||||
void Tree::Print(int indent)
|
||||
{
|
||||
indent_loop(indent);
|
||||
std::cout << "Tree:" << std::endl;
|
||||
|
||||
for (auto& x : Sequences)
|
||||
{
|
||||
x.Print(indent + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
129
NouVeL/NVL.h
129
NouVeL/NVL.h
|
@ -1,73 +1,96 @@
|
|||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <variant>
|
||||
#include <deque>
|
||||
|
||||
|
||||
namespace NVL
|
||||
{
|
||||
struct Nil
|
||||
{
|
||||
namespace NVL {
|
||||
enum struct Char_Type {
|
||||
LOWERCASE,
|
||||
UPPERCASE,
|
||||
NUMERIC,
|
||||
DELIM_BEG,
|
||||
DELIM_END,
|
||||
RETURN,
|
||||
WHITESPACE,
|
||||
SEMICOLON,
|
||||
QUOTE,
|
||||
DOT,
|
||||
UNDERSCORE
|
||||
};
|
||||
|
||||
struct Object
|
||||
{
|
||||
std::variant<
|
||||
Nil,
|
||||
float,
|
||||
std::string,
|
||||
bool,
|
||||
std::vector<Object> // Implies Object can be an array of other Object
|
||||
> Value;
|
||||
|
||||
bool Is_Symbol = false;
|
||||
|
||||
void Print(int indent);
|
||||
|
||||
Object()
|
||||
{}
|
||||
|
||||
Object(std::string n) : Value(n)
|
||||
{}
|
||||
};
|
||||
|
||||
struct Call
|
||||
{
|
||||
std::vector<Object> Objects;
|
||||
void Print(int indent);
|
||||
enum struct ErrCode {
|
||||
SUCCESS,
|
||||
PARSE_ERROR,
|
||||
SYNTAX_ERROR
|
||||
};
|
||||
|
||||
struct Sequence
|
||||
{
|
||||
std::string Name;
|
||||
std::vector<Call> Calls;
|
||||
|
||||
Sequence()
|
||||
{}
|
||||
struct NVLError {
|
||||
ErrCode code;
|
||||
int line;
|
||||
|
||||
Sequence(std::string n) : Name(n)
|
||||
{}
|
||||
|
||||
void Print(int indent);
|
||||
};
|
||||
|
||||
struct Tree
|
||||
{
|
||||
std::vector<Sequence> Sequences;
|
||||
void Print(int indent);
|
||||
enum struct Token_Type {
|
||||
STANDBY, // Only used in the tokenizer, initial state
|
||||
WS,
|
||||
RETURN,
|
||||
ID,
|
||||
INT,
|
||||
FLOAT,
|
||||
BOOL,
|
||||
STRING,
|
||||
SEQUENCE_BEG,
|
||||
SEQUENCE_END,
|
||||
DIALOGUE_BEG,
|
||||
DIALOGUE_END,
|
||||
LIST_BEG,
|
||||
LIST_END,
|
||||
QUOTE,
|
||||
DIALOGUE, // Do not want to deal with characters in all languages separately, essentially just strings
|
||||
COMMENT
|
||||
};
|
||||
|
||||
struct Token {
|
||||
std::string value;
|
||||
Token_Type type;
|
||||
|
||||
Token(std::string str, Token_Type type) : value(str), type(type) {};
|
||||
Token(char c, Token_Type type) : value(std::string{ c }), type(type) {};
|
||||
Token(char c, Char_Type type);
|
||||
};
|
||||
|
||||
NVLError tokenize(std::string file, std::vector<Token>& tokens);
|
||||
|
||||
enum struct Node_Type {
|
||||
ROOT,
|
||||
SEQUENCE,
|
||||
CALL,
|
||||
LIST,
|
||||
ID,
|
||||
LITERAL
|
||||
};
|
||||
|
||||
struct Context {
|
||||
std::vector<std::variant<
|
||||
Tree*,
|
||||
Sequence*,
|
||||
Call*,
|
||||
Object*
|
||||
>> Scope_Hierarchy;
|
||||
std::deque<Node_Type> stack;
|
||||
};
|
||||
|
||||
void parse_NVL(Tree& root, std::string path);
|
||||
struct ASTNode {
|
||||
std::vector<ASTNode> children;
|
||||
Node_Type type;
|
||||
std::variant<
|
||||
std::string,
|
||||
int,
|
||||
float,
|
||||
bool
|
||||
> Value;
|
||||
};
|
||||
|
||||
NVLError construct_tree(std::vector<Token> tokens, ASTNode& root);
|
||||
|
||||
struct Tree {
|
||||
ASTNode root;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,13 +1,20 @@
|
|||
#include "NVL.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
int main()
|
||||
{
|
||||
const std::string PJ_DIR = "E:\\Archive\\Projects\\NouVeL\\";
|
||||
|
||||
NVL::Tree tree;
|
||||
NVL::parse_NVL(tree, PJ_DIR + "test_j.nvl");
|
||||
tree.Print(0);
|
||||
std::ifstream fs(PJ_DIR + "test_j.nvl");
|
||||
std::string file((std::istreambuf_iterator<char>(fs)), std::istreambuf_iterator<char>());
|
||||
|
||||
std::vector<NVL::Token> tokens;
|
||||
|
||||
NVL::tokenize(file, tokens);
|
||||
|
||||
for (auto& c : tokens) {
|
||||
std::cout << c.value << ": " << (int) c.type << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
1
NouVeL/SymbolConfig.h
Normal file
1
NouVeL/SymbolConfig.h
Normal file
|
@ -0,0 +1 @@
|
|||
#pragma once
|
3
test.nvl
3
test.nvl
|
@ -1,3 +1,4 @@
|
|||
mmawesome {
|
||||
lmao [gkeposkge hee hee [2323.4 535 3434]]
|
||||
asda sdasd [asdad asdasd]
|
||||
"dsagjghj-=-=- dsfsd=<><\">()()d"
|
||||
}
|
||||
|
|
|
@ -1,8 +1,3 @@
|
|||
mmawesome {
|
||||
; testing chevron syntax
|
||||
"not mmaker" <
|
||||
I just realized that
|
||||
|
||||
I get this for free with scope coping
|
||||
>
|
||||
sadasd sadasd ["く" "ぇrち" "ゅいお" "p"]
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue