From f7cddaf6dcaafac6fe8244ba615ff35d901e7a46 Mon Sep 17 00:00:00 2001 From: lachrymaL Date: Fri, 16 Jul 2021 20:27:25 -0400 Subject: [PATCH] tried to parse --- NouVeL/NVL.cpp | 166 +++++++++++++++++++++++++--------------------- NouVeL/NVL.h | 31 +++++---- NouVeL/NouVeL.cpp | 2 +- test_utf8.nvl | 2 +- 4 files changed, 108 insertions(+), 93 deletions(-) diff --git a/NouVeL/NVL.cpp b/NouVeL/NVL.cpp index 8cbb91d..3d51746 100644 --- a/NouVeL/NVL.cpp +++ b/NouVeL/NVL.cpp @@ -45,8 +45,9 @@ namespace NVL { return Char_Type::LOWERCASE; } - Token::Token(char c, Char_Type type) { + Token::Token(char c, Char_Type type, int line) { value = std::string{ c }; + this->line = line; switch (type) { case Char_Type::DELIM_BEG: switch (c) { @@ -56,8 +57,9 @@ namespace NVL { case '{': this->type = Token_Type::SEQUENCE_BEG; break; -// case '(': -// break; + case '(': + this->type = Token_Type::BRACKET_BEG; + break; case '[': this->type = Token_Type::LIST_BEG; break; @@ -73,8 +75,9 @@ namespace NVL { case '}': this->type = Token_Type::SEQUENCE_END; break; -// case ')': -// break; + case ')': + this->type = Token_Type::BRACKET_END; + break; case ']': this->type = Token_Type::LIST_END; break; @@ -111,13 +114,13 @@ namespace NVL { current += file[i]; break; case Char_Type::DELIM_BEG: - tokens.push_back(Token(file[i], Char_Type::DELIM_BEG)); + tokens.push_back(Token(file[i], Char_Type::DELIM_BEG, current_line)); break; case Char_Type::DELIM_END: - tokens.push_back(Token(file[i], Char_Type::DELIM_END)); + tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line)); break; case Char_Type::RETURN: - tokens.push_back(Token('\n', Token_Type::RETURN)); + tokens.push_back(Token('\n', Token_Type::RETURN, current_line)); current_line++; break; case Char_Type::WHITESPACE: @@ -127,7 +130,7 @@ namespace NVL { break; case Char_Type::QUOTE: state = Token_Type::STRING; - tokens.push_back(Token(file[i], Token_Type::QUOTE)); + tokens.push_back(Token(file[i], Token_Type::QUOTE, current_line)); break; case Char_Type::DOT: state = Token_Type::FLOAT; @@ -146,18 +149,18 @@ namespace NVL { case Char_Type::DELIM_BEG: return { ErrCode::PARSE_ERROR, current_line }; case Char_Type::DELIM_END: - tokens.push_back(Token(current, state)); - tokens.push_back(Token(file[i], Char_Type::DELIM_END)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line)); state = Token_Type::STANDBY; break; case Char_Type::RETURN: - tokens.push_back(Token(current, state)); - tokens.push_back(Token('\n', Token_Type::RETURN)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token('\n', Token_Type::RETURN, current_line)); current_line++; state = Token_Type::STANDBY; break; case Char_Type::WHITESPACE: - tokens.push_back(Token(current, state)); + tokens.push_back(Token(current, state, current_line)); state = Token_Type::STANDBY; break; case Char_Type::SEMICOLON: @@ -179,18 +182,18 @@ namespace NVL { current += file[i]; break; case Char_Type::DELIM_END: - tokens.push_back(Token(current, state)); - tokens.push_back(Token(file[i], Char_Type::DELIM_END)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line)); state = Token_Type::STANDBY; break; case Char_Type::RETURN: - tokens.push_back(Token(current, state)); - tokens.push_back(Token('\n', Token_Type::RETURN)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token('\n', Token_Type::RETURN, current_line)); current_line++; state = Token_Type::STANDBY; break; case Char_Type::WHITESPACE: - tokens.push_back(Token(current, state)); + tokens.push_back(Token(current, state, current_line)); state = Token_Type::STANDBY; break; case Char_Type::DOT: @@ -214,18 +217,18 @@ namespace NVL { case Char_Type::DELIM_BEG: return { ErrCode::PARSE_ERROR, current_line }; case Char_Type::DELIM_END: - tokens.push_back(Token(current, state)); - tokens.push_back(Token(file[i], Char_Type::DELIM_END)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line)); state = Token_Type::STANDBY; break; case Char_Type::RETURN: - tokens.push_back(Token(current, state)); - tokens.push_back(Token('\n', Token_Type::RETURN)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token('\n', Token_Type::RETURN, current_line)); current_line++; state = Token_Type::STANDBY; break; case Char_Type::WHITESPACE: - tokens.push_back(Token(current, state)); + tokens.push_back(Token(current, state, current_line)); state = Token_Type::STANDBY; break; } @@ -234,8 +237,8 @@ namespace NVL { if (determine_type_escaped(file[i]) == Char_Type::RETURN) return { ErrCode::PARSE_ERROR, current_line }; if (determine_type_escaped(file[i]) == Char_Type::QUOTE && current.back() != '\\') { - tokens.push_back(Token(current, state)); - tokens.push_back(Token('\"', Token_Type::QUOTE)); + tokens.push_back(Token(current, state, current_line)); + tokens.push_back(Token('\"', Token_Type::QUOTE, current_line)); state = Token_Type::STANDBY; } else { @@ -248,7 +251,7 @@ namespace NVL { break; case Token_Type::COMMENT: if (determine_type_escaped(file[i]) == Char_Type::RETURN) { - tokens.push_back(Token('\n', Token_Type::RETURN)); + tokens.push_back(Token('\n', Token_Type::RETURN, current_line)); current_line++; } state = Token_Type::STANDBY; @@ -260,59 +263,68 @@ namespace NVL { return { ErrCode::SUCCESS, -1 }; } - NVLError construct_tree(std::vector tokens, ASTNode& root) { - Context root_context; - ASTNode* state = &root; - int current_line = 0; - - for (int i = 0; i < tokens.size(); i++) { - switch (tokens[i].type) { - case Token_Type::RETURN: - - break; - case Token_Type::ID: - - break; - case Token_Type::INT: - - break; - case Token_Type::FLOAT: - - break; - case Token_Type::BOOL: - - break; - case Token_Type::SEQUENCE_BEG: - - break; - case Token_Type::SEQUENCE_END: - - break; - case Token_Type::DIALOGUE_BEG: - - break; - case Token_Type::DIALOGUE_END: - - break; - case Token_Type::LIST_BEG: - - break; - case Token_Type::LIST_END: - - break; - case Token_Type::QUOTE: - - break; - case Token_Type::DIALOGUE: - - break; - default: - return { ErrCode::SYNTAX_ERROR, current_line }; - } + namespace Parse { + NVLError Symbol(Token** token, Node& root) { + NVLError status = { ErrCode::PARSE_ERROR, (*token)->line }; + return status; } - return { ErrCode::SUCCESS, current_line }; + NVLError Dialogue(Token** token, Node& root) { + NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line }; + + return status; + } + + NVLError Line(Token** token, Node& root) { + NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line }; + + while ((*token)->type != Token_Type::RETURN) { + if ((*token)->type == Token_Type::LIST_END) { + (*token)++; + break; + } + + if ((*token)->type == Token_Type::LIST_BEG) { + (*token)++; + status = Parse::Line(token, root); + } + else if ((*token)->type == Token_Type::DIALOGUE_BEG) { + (*token)++; + status = Parse::Dialogue(token, root); + } + else { + status = Parse::Symbol(token, root); + } + } + + return status; + } + + NVLError SequeneceName(Token** token, Node& root) { + NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line }; + + return status; + } + + NVLError Sequenece(Token** token, Node& root) { + NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line }; + + SequeneceName(token, root); + + while ((*token)->type != Token_Type::SEQUENCE_END) + status = Parse::Line(token, root); + + (*token)++; + return status; + } } + NVLError append_tree(std::vector tokens, Node& tree) { + NVLError status; + for (Token* i = &(tokens.front()); i < &(tokens.back()); i++) + status = Parse::Sequenece(&i, tree); + + return status; + } } diff --git a/NouVeL/NVL.h b/NouVeL/NVL.h index 44f2429..a30ee6b 100644 --- a/NouVeL/NVL.h +++ b/NouVeL/NVL.h @@ -24,7 +24,8 @@ namespace NVL { enum struct ErrCode { SUCCESS, PARSE_ERROR, - SYNTAX_ERROR + SYNTAX_ERROR, + EVAL_ERROR }; struct NVLError { @@ -48,6 +49,8 @@ namespace NVL { DIALOGUE_END, LIST_BEG, LIST_END, + BRACKET_BEG, + BRACKET_END, QUOTE, DIALOGUE, // Do not want to deal with characters in all languages separately, essentially just strings COMMENT @@ -56,10 +59,11 @@ namespace NVL { struct Token { std::string value; Token_Type type; + int line; - Token(std::string str, Token_Type type) : value(str), type(type) {}; - Token(char c, Token_Type type) : value(std::string{ c }), type(type) {}; - Token(char c, Char_Type type); + Token(std::string str, Token_Type type, int line) : value(str), type(type), line(line) {}; + Token(char c, Token_Type type, int line) : value(std::string{ c }), type(type), line(line) {}; + Token(char c, Char_Type type, int line); }; NVLError tokenize(std::string file, std::vector& tokens); @@ -73,12 +77,8 @@ namespace NVL { LITERAL }; - struct Context { - std::deque stack; - }; - - struct ASTNode { - std::vector children; + struct Node { + std::vector children; Node_Type type; std::variant< std::string, @@ -88,9 +88,12 @@ namespace NVL { > Value; }; - NVLError construct_tree(std::vector tokens, ASTNode& root); + NVLError append_tree(std::vector tokens, Node& tree); - struct Tree { - ASTNode root; - }; + namespace Parse { + NVLError Sequenece(Token** token, Node& root); + NVLError Line(Token** token, Node& root); + NVLError Dialogue(Token** token, Node& root); + NVLError Symbol(Token** token, Node& root); + } } diff --git a/NouVeL/NouVeL.cpp b/NouVeL/NouVeL.cpp index a2f5519..f48655c 100644 --- a/NouVeL/NouVeL.cpp +++ b/NouVeL/NouVeL.cpp @@ -5,7 +5,7 @@ int main() { const std::string PJ_DIR = "E:\\Archive\\Projects\\NouVeL\\"; - std::ifstream fs(PJ_DIR + "test_j.nvl"); + std::ifstream fs(PJ_DIR + "test_utf8.nvl"); std::string file((std::istreambuf_iterator(fs)), std::istreambuf_iterator()); std::vector tokens; diff --git a/test_utf8.nvl b/test_utf8.nvl index 08335db..5e11ea0 100644 --- a/test_utf8.nvl +++ b/test_utf8.nvl @@ -1,3 +1,3 @@ mmawesome { - sadasd sadasd ["く" "ぇrち" "ゅいお" "p"] + sadasd sadasd ["く\"" "ぇrち"[][[[[]]]][][][] "ゅいお" "p"] }