Nuked everything, REAL soulless lexer implemented

2021-07-07 18:03:41 -04:00 · 2021-07-07 18:03:41 -04:00 · e5759de673
commit e5759de673
parent ecad7b3817
7 changed files with 383 additions and 552 deletions
--- a/NouVeL/CMakeLists.txt
+++ b/NouVeL/CMakeLists.txt
@ -4,6 +4,6 @@
 cmake_minimum_required (VERSION 3.8)

 # Add source to this project's executable.
-add_executable (NouVeL "NouVeL.cpp"  "NVL.cpp" "NVL.h")
+add_executable (NouVeL "NouVeL.cpp"  "NVL.cpp" "NVL.h" "SymbolConfig.h")

 # TODO: Add tests and install targets if needed.
--- a/NouVeL/NVL.cpp
+++ b/NouVeL/NVL.cpp
@ -1,514 +1,318 @@
-#include <map>
 #include "NVL.h"
+#include "SymbolConfig.h"
+#include <unordered_map>

-namespace {
-	// general helpers, may move these into a different translation unit
-	bool str_test_every_char_not(int (*test)(int), std::string str) 
-	{
-		for (auto& x : str)
-		{
-			if (!(*test)(x))
-				return false;
+namespace NVL {
+	std::unordered_map<char, Char_Type> intialize_map() {
+		std::unordered_map<char, Char_Type> Char_Map;
+		for (char c : "abcdefghijklmnopqrstuvwxyz") {
+			Char_Map[c] = Char_Type::LOWERCASE;
 		}
-		return true;
+		for (char c : "ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
+			Char_Map[c] = Char_Type::UPPERCASE;
+		}
+		for (char c : "1234567890") {
+			Char_Map[c] = Char_Type::NUMERIC;
+		}
+		for (char c : "<{([") {
+			Char_Map[c] = Char_Type::DELIM_BEG;
+		}
+		for (char c : ">})]") {
+			Char_Map[c] = Char_Type::DELIM_END;
+		}
+		for (char c : " \t") {
+			Char_Map[c] = Char_Type::WHITESPACE;
+		}
+		for (char c : "\'\"") {
+			Char_Map[c] = Char_Type::QUOTE;
+		}
+		Char_Map['\n'] = Char_Type::RETURN;
+		Char_Map['.'] = Char_Type::DOT;
+		Char_Map['_'] = Char_Type::UNDERSCORE;
+		return Char_Map;
 	}
-	void indent_loop(int indent)
-	{
-		for (int i = 0; i < indent; i++)
-		{
-			std::cout << "\t";
+
+	const auto Char_Map = intialize_map();
+
+	Char_Type determine_type(char c) {
+			return Char_Map.at(c);
+	}
+
+	Char_Type determine_type_escaped(char c) {
+		if (Char_Map.find(c) != Char_Map.end())
+			return Char_Map.at(c);
+		else
+			return Char_Type::LOWERCASE;
+	}
+
+	Token::Token(char c, Char_Type type) {
+		value = std::string{ c };
+		switch (type) {
+		case Char_Type::DELIM_BEG:
+			switch (c) {
+			case '<':
+				this->type = Token_Type::DIALOGUE_BEG;
+				break;
+			case '{':
+				this->type = Token_Type::SEQUENCE_BEG;
+				break;
+//			case '(':
+//				break;
+			case '[':
+				this->type = Token_Type::LIST_BEG;
+				break;
+			default:
+				std::cout << "Failed to construct token: " << c << '\n'; // error, cannot do error codes in constructor..
+			}
+			break;
+		case Char_Type::DELIM_END:
+			switch (c) {
+			case '>':
+				this->type = Token_Type::DIALOGUE_END;
+				break;
+			case '}':
+				this->type = Token_Type::SEQUENCE_END;
+				break;
+//			case ')':
+//				break;
+			case ']':
+				this->type = Token_Type::LIST_END;
+				break;
+			default:
+				std::cout << "Failed to construct token: " << c << '\n'; // error
+			}
+			break;
+		default:
+			std::cout << "Failed to construct token: " << c << '\n'; // error
 		}
 	}
-	int query_ifstream_line_number(std::ifstream& stream) 
-	{
-		std::streampos pos = stream.tellg();
-		stream.seekg(0); // we should not be eof
-		int i = 0;
-		char c{};
-		while (stream.tellg() < pos) 
-		{
-			if (c == '\n')
-				i++;
-			stream.get(c);
-		}
-		return i;
-	}

-	// facilities for scopers
-	const std::map<char, char> SCOPER_MAP = {
-			{'{', '}' },	// CURLY
-			{'<', '>'},		// ANGLED 
-			{'(', ')'},		// BRACKET
-			{'[', ']'},		// SQUARE
-			
-			{'\'', '\''},	// SQUOTE
-			{'\"', '\"'}	// DQUOTE
-		};
+	NVLError tokenize(std::string file, std::vector<Token>& tokens) {
+		Token_Type state = Token_Type::STANDBY;

-	bool need_escape(char c) 
-	{
-		// todo: refactor, too cryptic
-		return (c == '\'' || c == '\"');
-	}
+		std::string current;

-	struct ScopeContext {
-		std::vector<char> Scope;
-	};
+		int current_line = 1;

-	std::streampos scope_cope(char scope_char, std::ifstream& stream) 
-	{
-		// important
-		// scope_cope() expects the scope opener to already have been fetched and stored in scope_char
-		// i.o.w. it wont be able to cope properly because there will be one extra level of scope that it cannot match
+		for (int i = 0; i < file.length(); i++ ) {
+			switch (state) {
+			case Token_Type::STANDBY:
+				current = "";

-		std::streampos initial = stream.tellg();
-		
-		std::streampos final;
-		char c{};
-		
-		ScopeContext current_scope;
-		current_scope.Scope.push_back(scope_char);
-
-		while (!current_scope.Scope.empty()) {
-			stream.get(c);
-
-			// push scope if opening scoper is found
-			bool set_scope_on_this_iter = false;
-			// there cannot be any more scope inside of quoted strings
-			if (current_scope.Scope.back() != '\'' && current_scope.Scope.back() != '\"') {
-				for (auto const& x : SCOPER_MAP)
-				{
-					if (c == x.first) {
-						current_scope.Scope.push_back(c); 
-						set_scope_on_this_iter = true;
-						break;
-					}
+				switch (determine_type(file[i])) {
+				case Char_Type::LOWERCASE:
+				case Char_Type::UPPERCASE:
+				case Char_Type::UNDERSCORE:
+					state = Token_Type::ID;
+					current += file[i];
+					break;
+				case Char_Type::NUMERIC:
+					state = Token_Type::INT;
+					current += file[i];
+					break;
+				case Char_Type::DELIM_BEG:
+					tokens.push_back(Token(file[i], Char_Type::DELIM_BEG));
+					break;
+				case Char_Type::DELIM_END:
+					tokens.push_back(Token(file[i], Char_Type::DELIM_END));
+					break;
+				case Char_Type::RETURN:
+					tokens.push_back(Token('\n', Token_Type::RETURN));
+					current_line++;
+					break;
+				case Char_Type::WHITESPACE:
+					break;
+				case Char_Type::SEMICOLON:
+					state = Token_Type::COMMENT;
+					break;
+				case Char_Type::QUOTE:
+					state = Token_Type::STRING;
+					tokens.push_back(Token(file[i], Token_Type::QUOTE));
+					break;
+				case Char_Type::DOT:
+					state = Token_Type::FLOAT;
+					current += file[i];
+					break;
 				}
-			}
-
-			// pop last scope if ending scoper is found
-			// set_scope_on_this_iter to prevent immediately popping scopers that have the same closers and openers
-			if (!set_scope_on_this_iter && c == SCOPER_MAP.at(current_scope.Scope.back()))
-				current_scope.Scope.pop_back();
-
-			if (c == '\\') // encounters an escaped sequence
-			{
-				char cc = stream.peek(); 
-				if (need_escape(cc)) // see if the escaped char happens to be a scoper
-				{
-					stream.get(c); // skip it, since whoever wrote the script escaped it
+				break;
+			case Token_Type::ID:
+				switch (determine_type(file[i])) {
+				case Char_Type::LOWERCASE:
+				case Char_Type::UPPERCASE:
+				case Char_Type::NUMERIC:
+				case Char_Type::UNDERSCORE:
+					current += file[i];
+					break;
+				case Char_Type::DELIM_BEG:
+					return { ErrCode::PARSE_ERROR, current_line };
+				case Char_Type::DELIM_END:
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token(file[i], Char_Type::DELIM_END));
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::RETURN:
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token('\n', Token_Type::RETURN));
+					current_line++;
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::WHITESPACE:
+					tokens.push_back(Token(current, state));
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::SEMICOLON:
+				case Char_Type::QUOTE:
+				case Char_Type::DOT:
+					return { ErrCode::PARSE_ERROR, current_line };
 				}
-			}
-
-			// cannot match all the scopes and has read to the end of the file
-			if (stream.eof())
-				throw std::runtime_error("Can't cope with scope at line " + std::to_string(query_ifstream_line_number(stream)));
-			
-		}
-
-		final = stream.tellg();
-		stream.seekg(initial);
-
-		return final;
-	}
-
-	// facilities for parsing
-	void skip_ws(std::ifstream& stream)
-	{
-		stream >> std::ws;
-	}
-
-	bool only_ws_before_next_newline(std::ifstream& stream) {
-		if (!isspace(stream.peek()))
-			return false;
-
-		std::streampos initial = stream.tellg();
-		char c{};
-		stream.get(c);
-		while (c != '\n') 
-		{
-			stream.get(c);
-			if (!isspace(c)) {
-				stream.seekg(initial);
-				return false;
+				break;
+			case Token_Type::INT:
+				switch (determine_type(file[i])) {
+				case Char_Type::LOWERCASE:
+				case Char_Type::UPPERCASE:
+				case Char_Type::UNDERSCORE:
+				case Char_Type::DELIM_BEG:
+				case Char_Type::SEMICOLON:
+				case Char_Type::QUOTE:
+					return { ErrCode::PARSE_ERROR, current_line };
+				case Char_Type::NUMERIC:
+					current += file[i];
+					break;
+				case Char_Type::DELIM_END:
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token(file[i], Char_Type::DELIM_END));
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::RETURN:
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token('\n', Token_Type::RETURN));
+					current_line++;
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::WHITESPACE:
+					tokens.push_back(Token(current, state));
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::DOT:
+					state = Token_Type::FLOAT;
+					current += file[i];
+					break;
+				}
+				break;
+			case Token_Type::FLOAT:
+				switch (determine_type(file[i])) {
+				case Char_Type::LOWERCASE:
+				case Char_Type::UPPERCASE:
+				case Char_Type::UNDERSCORE:
+				case Char_Type::SEMICOLON:
+				case Char_Type::QUOTE:
+				case Char_Type::DOT:
+					return { ErrCode::PARSE_ERROR, current_line };
+				case Char_Type::NUMERIC:
+					current += file[i];
+					break;
+				case Char_Type::DELIM_BEG:
+					return { ErrCode::PARSE_ERROR, current_line };
+				case Char_Type::DELIM_END:
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token(file[i], Char_Type::DELIM_END));
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::RETURN:
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token('\n', Token_Type::RETURN));
+					current_line++;
+					state = Token_Type::STANDBY;
+					break;
+				case Char_Type::WHITESPACE:
+					tokens.push_back(Token(current, state));
+					state = Token_Type::STANDBY;
+					break;
+				}
+				break;
+			case Token_Type::STRING:
+				if (determine_type_escaped(file[i]) == Char_Type::RETURN)
+					return { ErrCode::PARSE_ERROR, current_line };
+				if (determine_type_escaped(file[i]) == Char_Type::QUOTE && current.back() != '\\') {
+					tokens.push_back(Token(current, state));
+					tokens.push_back(Token('\"', Token_Type::QUOTE));
+					state = Token_Type::STANDBY;
+				}
+				else {
+					current += file[i];
+				}
+				break;
+			case Token_Type::DIALOGUE:
+				// if (determine_type(file[i]) == Char_Type::)
+					;
+				break;
+			case Token_Type::COMMENT:
+				if (determine_type_escaped(file[i]) == Char_Type::RETURN) {
+					tokens.push_back(Token('\n', Token_Type::RETURN));
+					current_line++;
+				}
+				state = Token_Type::STANDBY;
+				break;
+			default:
+				return { ErrCode::PARSE_ERROR, current_line };
 			}
 		}
-		
-		stream.seekg(initial);
-		return true;
+		return { ErrCode::SUCCESS, -1 };
 	}
 	
-	std::string read_sequence_name(std::ifstream& nvl) 
-	{
-		// this function will move the reading head of nvl, this is hard to keep track of but simplifies the program
-		skip_ws(nvl);
-		std::string token;
-		char c{};
+	NVLError construct_tree(std::vector<Token> tokens, ASTNode& root) {
+		Context root_context;
+		ASTNode* state = &root;
+		int current_line = 0;

-		nvl.get(c);
-		while (c != ' ' && c != '{' && c != '\n')
-		{
-			token += c;
-			nvl.get(c);
-		}
-		nvl.putback(c);
+		for (int i = 0; i < tokens.size(); i++) {
+			switch (tokens[i].type) {
+			case Token_Type::RETURN:

-		return token;
-	}
-
-
-	void skip_comment(std::ifstream& nvl, char& c)
-	{
-		if (nvl.peek() == ';')
-		{
-			nvl.get(c);
-			while (c != '\n')
-			{
-				nvl.get(c);
-			}
-
-			skip_ws(nvl);
-
-			skip_comment(nvl, c);
-
-			skip_ws(nvl);
-		}
-	}
-}
-
-namespace NVL
-{
-	void parse_Dialogue(Context old_context, std::ifstream& nvl)
-	{
-		if (std::get<Call*>(old_context.Scope_Hierarchy.back())->Objects.size() != 1)
-			throw std::runtime_error("Failed to parse dialogue at line " + std::to_string(query_ifstream_line_number(nvl)));
-
-		const Object say_command = Object("Say");
-
-		Object speaker = std::get<Call*>(old_context.Scope_Hierarchy.back())->Objects.front(); // Copy speaker object
-
-		Sequence* parent_sequence = std::get<Sequence*>(old_context.Scope_Hierarchy.rbegin()[1]);
-		// old call has never been pushed, we need to exit parse_Call right away when we finish parsing the dialogue, because this already deals with calls
-
-		char c{};
-		nvl.get(c); // get <
-		std::streampos end = scope_cope('<', nvl);
-
-		skip_ws(nvl);
-
-		do
-		{
-			parent_sequence->Calls.push_back(Call());
-			parent_sequence->Calls.back().Objects.push_back(say_command);
-			parent_sequence->Calls.back().Objects.push_back(speaker);
-
-			std::string text;
-
-			if (nvl.peek() == '>')
 				break;
+			case Token_Type::ID:

-			// im not sure if the execution order here will be problematic, seems to work ok
-			while (!( (c == '\n' && only_ws_before_next_newline(nvl)) || nvl.peek() == '>'))
-			{
-				nvl.get(c);
-				text += c;
+				break;
+			case Token_Type::INT:
+
+				break;
+			case Token_Type::FLOAT:
+
+				break;
+			case Token_Type::BOOL:
+
+				break;
+			case Token_Type::SEQUENCE_BEG:
+
+				break;
+			case Token_Type::SEQUENCE_END:
+
+				break;
+			case Token_Type::DIALOGUE_BEG:
+
+				break;
+			case Token_Type::DIALOGUE_END:
+
+				break;
+			case Token_Type::LIST_BEG:
+
+				break;
+			case Token_Type::LIST_END:
+
+				break;
+			case Token_Type::QUOTE:
+
+				break;
+			case Token_Type::DIALOGUE:
+
+				break;
+			default:
+				return { ErrCode::SYNTAX_ERROR, current_line };
 			}

-			while (isspace(text.back()))
-				text.erase(text.size()-1, 1);
+		}

-			for (size_t i = text.find('\n'); i != std::string::npos; i = text.find('\n')) 
-			{
-				int e = 0;
-				std::cout << text.size();
-				while (isspace(text.at(i + e))) 
-					e++;
-
-				text.erase(i, e);
-				text.insert(i, " ");
-			}
-
-			parent_sequence->Calls.back().Objects.push_back(Object(text));
-			skip_ws(nvl);
-		} while (nvl.tellg() < end - static_cast<std::streampos>(1));
-
-		nvl.get(c); // get >
-
-		skip_ws(nvl);
-		return;
+		return { ErrCode::SUCCESS, current_line };
 	}

-	template <bool is_parent_call, bool is_symbol>
-	void parse_Object(Context parent_context, std::ifstream& nvl)
-	{
-		// chevron dialogue is now handled in a separate function
-		Object this_object;
-		Context this_context = parent_context;
-		this_context.Scope_Hierarchy.push_back(&this_object);
-
-		std::variant<std::string, std::vector<Object>> content = ""; // init to empty str
-		char c{};		
-
-		// early exit for comments
-		if (nvl.peek() == ';' && is_parent_call)
-		{
-			while (nvl.peek() != '\n')
-				nvl.get(c);
-			return;
-		}
-
-		switch (nvl.peek())
-		{
-		case '[': // List
-		{
-			nvl.get(c);
-			std::streampos end = scope_cope('[', nvl);
-			while (nvl.tellg() < end - static_cast<std::streampos>(1))
-			{
-				parse_Object<false, false>(this_context, nvl);
-				skip_ws(nvl);
-			}
-			nvl.get(c); // skip ending scoper (']')
-			break;
-		}
-		case '\"': // String
-		{
-			nvl.get(c);
-			std::streampos end = scope_cope('\"', nvl);
-			while (nvl.tellg() < end - static_cast<std::streampos>(1))
-			{
-				nvl.get(c);
-
-				// do not concat escaping '\' to content
-				if (c == '\\' && (nvl.peek() == '\'' || nvl.peek() == '\"'))
-					continue;
-
-				content = std::get<std::string>(content) + c;
-			}
-			nvl.get(c); // skip ending scoper
-
-			this_object.Value = std::get<std::string>(content);
-			break;
-		}
-		default:
-		{
-			nvl.get(c);
-			while (c != ' ' && c != '\n' && c != '}' && c != ']' && c != ',')
-			{
-				content = std::get<std::string>(content) + c;
-				nvl.get(c);
-			}
-
-			// ']' handled in next object parse, '}' handled in sequence parse, '\n' will be skipped somewhere with skip_ws(), where? idk
-			if (c == ']' || c == '}' || c == '\n')
-			{
-				nvl.putback(c);
-			}
-
-
-			try
-			{
-				// try number
-				this_object.Value = std::stof(std::get<std::string>(content));
-			}
-			catch (std::exception)
-			{
-				if (std::get<std::string>(content) == "true")
-				{
-					this_object.Value = true;
-				}
-				else if (std::get<std::string>(content) == "false") {
-					this_object.Value = false;
-				}
-				else
-				{
-					// early return if string is empty, this should only happen if calling from an object (not a call)
-					if (str_test_every_char_not(&isspace, std::get<std::string>(content)))
-						return;
-
-					// default case if content does not match keywords
-					this_object.Value = std::get<std::string>(content);
-				}
-			}
-			break;
-		}
-		}
-
-
-		this_object.Is_Symbol = is_symbol;
-
-		if (is_parent_call)
-			std::get<Call*>(parent_context.Scope_Hierarchy.back())->Objects.push_back(this_object);
-		else if (std::get<Object*>(parent_context.Scope_Hierarchy.back())->Value.index() == 4) // 4 for list, indicates that parent is an object that is already a vector of objects
-			std::get<std::vector<Object>>(
-				std::get<Object*>(parent_context.Scope_Hierarchy.back())->Value
-				).push_back(this_object);
-		else // parent is not yet vector, initialize as (change from nil to) vector
-			std::get<Object*>(parent_context.Scope_Hierarchy.back())->Value = std::vector<Object> { this_object };
-		// the case for chevron dialogues are handled in the switch, there is an early return in the chevron case
-	}
-
-	void parse_Call(Context parent_context, std::ifstream& nvl)
-	{
-		Call this_call;
-
-		Context this_context = parent_context;
-		this_context.Scope_Hierarchy.push_back(&this_call);
-
-		skip_ws(nvl);
-
-		// do not push anything if line starts with a comment
-		char c{};
-		skip_comment(nvl, c);
-		// early exit if sequence is empty
-		if (nvl.peek() == '}')
-			return;
-
-		while (!(nvl.peek() == '\n' || nvl.peek() == '}'))
-		{
-			skip_ws(nvl);
-			if (nvl.peek() == '<') // dialogue! abandon current call, parse_Dialogue deals with pushing calls
-			{
-				parse_Dialogue(this_context, nvl);
-				return;
-			}
-
-			parse_Object<true, false>(this_context, nvl);
-		}
-
-		std::get<Sequence*>(parent_context.Scope_Hierarchy.back())->Calls.push_back(this_call);
-	}
-
-	void parse_Sequence(Context parent_context, std::ifstream& nvl)
-	{
-		Sequence this_sequence;
-		Context this_context = parent_context;
-		this_context.Scope_Hierarchy.push_back(&this_sequence);
-
-		skip_ws(nvl);
-
-		char c{};
-		skip_comment(nvl, c);
-
-		this_sequence = Sequence(read_sequence_name(nvl));
-
-		skip_ws(nvl);
-
-		nvl.get(c); // get {
-
-		if (c != '{')
-			throw std::runtime_error("Sequence parse failed at line " + std::to_string(query_ifstream_line_number(nvl)));
-
-		std::streampos end_pos = scope_cope(c, nvl);
-
-		while (nvl.tellg() < end_pos - static_cast<std::streampos>(1))
-		{
-			parse_Call(this_context, nvl);
-			skip_ws(nvl);
-		}
-
-		nvl.get(c); // get }
-
-		std::get<Tree*>(parent_context.Scope_Hierarchy.back())->Sequences.push_back(this_sequence);
-	}
-
-	void parse_NVL(Tree& root, std::string path)
-	{
-		std::ifstream nvl;
-		std::cout << "Reading file " << path << "..." << std::endl;
-		nvl.open(path);
-
-		if (nvl.is_open()) {
-			Context current_context;
-			current_context.Scope_Hierarchy.push_back(&root);
-
-			skip_ws(nvl); // just in case the file is completely empty
-
-			while (!nvl.eof()) {
-				// parse_Sequence() already takes care of comments before a sequence
-				parse_Sequence(current_context, nvl);
-				skip_ws(nvl);
-
-				char c{};
-				skip_comment(nvl, c);
-
-				skip_ws(nvl);
-			}
-
-			nvl.close();
-		} else
-			throw std::runtime_error("Unable to read file " + path);
-
-	}
-
-
-
-	void Object::Print(int indent)
-	{
-		indent_loop(indent);
-		std::cout << "Object: ";
-
-		switch (Value.index())
-		{
-		case 0:
-			std::cout << "Nil" << std::endl;
-			break;
-		case 1:
-			std::cout << "Float: " << std::get<float>(Value) << std::endl;
-			break;
-		case 2:
-			if (Is_Symbol)
-				std::cout << "Symbol: " << std::get<std::string>(Value) << std::endl;
-			else
-				std::cout << "String: " << std::get<std::string>(Value) << std::endl;
-			break;
-		case 3:
-			std::cout << "Bool: " << std::boolalpha << std::get<bool>(Value) << std::endl;
-			break;
-		case 4:
-			std::cout << "List:" << std::endl;
-
-			for (auto& x : std::get<std::vector<Object>>(Value))
-			{
-				x.Print(indent + 1);
-			} 
-			break;
-		}
-	}
-
-	void Call::Print(int indent)
-	{
-		indent_loop(indent);
-		std::cout << "Call:" << std::endl;
-
-		for (auto& x : Objects)
-		{
-			x.Print(indent + 1);
-		}
-	}
-
-	void Sequence::Print(int indent)
-	{
-		indent_loop(indent);
-		std::cout << "Sequence " << Name << ":" << std::endl;
-
-		for (auto& x : Calls)
-		{
-			x.Print(indent + 1);
-		}
-
-		indent_loop(indent);
-		std::cout << "-----------------------------------------------" << std::endl;
-	}
-
-	void Tree::Print(int indent)
-	{
-		indent_loop(indent);
-		std::cout << "Tree:" << std::endl;
-
-		for (auto& x : Sequences) 
-		{
-			x.Print(indent + 1);
-		}
-	}
 }
--- a/NouVeL/NVL.h
+++ b/NouVeL/NVL.h
@ -1,73 +1,96 @@
 #pragma once

 #include <iostream>
-#include <fstream>
 #include <string>
 #include <vector>
 #include <variant>
+#include <deque>

-
-namespace NVL 
-{
-	struct Nil 
-	{
+namespace NVL {
+	enum struct Char_Type {
+		LOWERCASE,
+		UPPERCASE,
+		NUMERIC,
+		DELIM_BEG,
+		DELIM_END,
+		RETURN,
+		WHITESPACE,
+		SEMICOLON,
+		QUOTE,
+		DOT,
+		UNDERSCORE
 	};

-	struct Object 
-	{
-		std::variant<
-			Nil,
-			float,
-			std::string,
-			bool,
-			std::vector<Object> // Implies Object can be an array of other Object
-		> Value;
-
-		bool Is_Symbol = false;
-
-		void Print(int indent);
-
-		Object() 
-		{}
-
-		Object(std::string n) : Value(n)
-		{}
-	};
-	
-	struct Call 
-	{
-		std::vector<Object> Objects;
-		void Print(int indent);
+	enum struct ErrCode {
+		SUCCESS,
+		PARSE_ERROR,
+		SYNTAX_ERROR
 	};

-	struct Sequence 
-	{
-		std::string Name;
-		std::vector<Call> Calls;
-		
-		Sequence() 
-		{}
+	struct NVLError {
+		ErrCode code;
+		int line;

-		Sequence(std::string n) : Name(n) 
-		{}
-
-		void Print(int indent);
 	};

-	struct Tree 
-	{
-		std::vector<Sequence> Sequences;
-		void Print(int indent);
+	enum struct Token_Type {
+		STANDBY,	// Only used in the tokenizer, initial state
+		WS,
+		RETURN,
+		ID,
+		INT,
+		FLOAT,
+		BOOL,
+		STRING,
+		SEQUENCE_BEG,
+		SEQUENCE_END,
+		DIALOGUE_BEG,
+		DIALOGUE_END,
+		LIST_BEG,
+		LIST_END,
+		QUOTE,
+		DIALOGUE,	// Do not want to deal with characters in all languages separately, essentially just strings
+		COMMENT
+	};
+
+	struct Token {
+		std::string value;
+		Token_Type type;
+
+		Token(std::string str, Token_Type type) : value(str), type(type) {};
+		Token(char c, Token_Type type) : value(std::string{ c }), type(type) {};
+		Token(char c, Char_Type type);
+	};
+
+	NVLError tokenize(std::string file, std::vector<Token>& tokens);
+
+	enum struct Node_Type {
+		ROOT,
+		SEQUENCE,
+		CALL,
+		LIST,
+		ID,
+		LITERAL
 	};

 	struct Context {
-		std::vector<std::variant<
-			Tree*,
-			Sequence*,
-			Call*,
-			Object*
-			>> Scope_Hierarchy;
+		std::deque<Node_Type> stack;
 	};

-	void parse_NVL(Tree& root, std::string path);
+	struct ASTNode {
+		std::vector<ASTNode> children;
+		Node_Type type;
+		std::variant<
+			std::string,
+			int,
+			float,
+			bool
+		> Value;
+	};
+
+	NVLError construct_tree(std::vector<Token> tokens, ASTNode& root);
+
+	struct Tree {
+		ASTNode root;
+	};
 }
--- a/NouVeL/NouVeL.cpp
+++ b/NouVeL/NouVeL.cpp
@ -1,13 +1,20 @@
 #include "NVL.h"
-
+#include <fstream>

 int main()
 {
 	const std::string PJ_DIR = "E:\\Archive\\Projects\\NouVeL\\";
 	
-	NVL::Tree tree;
-	NVL::parse_NVL(tree, PJ_DIR + "test_j.nvl");
-	tree.Print(0);
+	std::ifstream fs(PJ_DIR + "test_j.nvl");
+	std::string file((std::istreambuf_iterator<char>(fs)), std::istreambuf_iterator<char>());
+
+	std::vector<NVL::Token> tokens;
+
+	NVL::tokenize(file, tokens);
+
+	for (auto& c : tokens) {
+		std::cout << c.value << ": " << (int) c.type << "\n";
+	}

 	return 0;
 }
--- a/NouVeL/SymbolConfig.h
+++ b/NouVeL/SymbolConfig.h
@ -0,0 +1 @@
+#pragma once
--- a/test.nvl
+++ b/test.nvl
@ -1,3 +1,4 @@
 mmawesome {
-	lmao [gkeposkge hee hee [2323.4 535 3434]]
+	asda sdasd [asdad asdasd]
+	"dsagjghj-=-=- dsfsd=<><\">()()d"
 }
--- a/test_j.nvl
+++ b/test_j.nvl
@ -1,8 +1,3 @@
 mmawesome {
-	; testing chevron syntax
-	"not mmaker" <
-		I just realized that
-
-		I get this for free with scope coping
-	>
+	sadasd sadasd ["く" "ぇｒち" "ゅいお" "ｐ"]
 }