tried to parse

This commit is contained in:
lachrymaL 2021-07-16 20:27:25 -04:00
parent 530803b721
commit f7cddaf6dc
No known key found for this signature in database
GPG key ID: F3640ACFA174B1C1
4 changed files with 108 additions and 93 deletions

View file

@ -45,8 +45,9 @@ namespace NVL {
return Char_Type::LOWERCASE; return Char_Type::LOWERCASE;
} }
Token::Token(char c, Char_Type type) { Token::Token(char c, Char_Type type, int line) {
value = std::string{ c }; value = std::string{ c };
this->line = line;
switch (type) { switch (type) {
case Char_Type::DELIM_BEG: case Char_Type::DELIM_BEG:
switch (c) { switch (c) {
@ -56,8 +57,9 @@ namespace NVL {
case '{': case '{':
this->type = Token_Type::SEQUENCE_BEG; this->type = Token_Type::SEQUENCE_BEG;
break; break;
// case '(': case '(':
// break; this->type = Token_Type::BRACKET_BEG;
break;
case '[': case '[':
this->type = Token_Type::LIST_BEG; this->type = Token_Type::LIST_BEG;
break; break;
@ -73,8 +75,9 @@ namespace NVL {
case '}': case '}':
this->type = Token_Type::SEQUENCE_END; this->type = Token_Type::SEQUENCE_END;
break; break;
// case ')': case ')':
// break; this->type = Token_Type::BRACKET_END;
break;
case ']': case ']':
this->type = Token_Type::LIST_END; this->type = Token_Type::LIST_END;
break; break;
@ -111,13 +114,13 @@ namespace NVL {
current += file[i]; current += file[i];
break; break;
case Char_Type::DELIM_BEG: case Char_Type::DELIM_BEG:
tokens.push_back(Token(file[i], Char_Type::DELIM_BEG)); tokens.push_back(Token(file[i], Char_Type::DELIM_BEG, current_line));
break; break;
case Char_Type::DELIM_END: case Char_Type::DELIM_END:
tokens.push_back(Token(file[i], Char_Type::DELIM_END)); tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line));
break; break;
case Char_Type::RETURN: case Char_Type::RETURN:
tokens.push_back(Token('\n', Token_Type::RETURN)); tokens.push_back(Token('\n', Token_Type::RETURN, current_line));
current_line++; current_line++;
break; break;
case Char_Type::WHITESPACE: case Char_Type::WHITESPACE:
@ -127,7 +130,7 @@ namespace NVL {
break; break;
case Char_Type::QUOTE: case Char_Type::QUOTE:
state = Token_Type::STRING; state = Token_Type::STRING;
tokens.push_back(Token(file[i], Token_Type::QUOTE)); tokens.push_back(Token(file[i], Token_Type::QUOTE, current_line));
break; break;
case Char_Type::DOT: case Char_Type::DOT:
state = Token_Type::FLOAT; state = Token_Type::FLOAT;
@ -146,18 +149,18 @@ namespace NVL {
case Char_Type::DELIM_BEG: case Char_Type::DELIM_BEG:
return { ErrCode::PARSE_ERROR, current_line }; return { ErrCode::PARSE_ERROR, current_line };
case Char_Type::DELIM_END: case Char_Type::DELIM_END:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token(file[i], Char_Type::DELIM_END)); tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::RETURN: case Char_Type::RETURN:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token('\n', Token_Type::RETURN)); tokens.push_back(Token('\n', Token_Type::RETURN, current_line));
current_line++; current_line++;
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::WHITESPACE: case Char_Type::WHITESPACE:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::SEMICOLON: case Char_Type::SEMICOLON:
@ -179,18 +182,18 @@ namespace NVL {
current += file[i]; current += file[i];
break; break;
case Char_Type::DELIM_END: case Char_Type::DELIM_END:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token(file[i], Char_Type::DELIM_END)); tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::RETURN: case Char_Type::RETURN:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token('\n', Token_Type::RETURN)); tokens.push_back(Token('\n', Token_Type::RETURN, current_line));
current_line++; current_line++;
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::WHITESPACE: case Char_Type::WHITESPACE:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::DOT: case Char_Type::DOT:
@ -214,18 +217,18 @@ namespace NVL {
case Char_Type::DELIM_BEG: case Char_Type::DELIM_BEG:
return { ErrCode::PARSE_ERROR, current_line }; return { ErrCode::PARSE_ERROR, current_line };
case Char_Type::DELIM_END: case Char_Type::DELIM_END:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token(file[i], Char_Type::DELIM_END)); tokens.push_back(Token(file[i], Char_Type::DELIM_END, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::RETURN: case Char_Type::RETURN:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token('\n', Token_Type::RETURN)); tokens.push_back(Token('\n', Token_Type::RETURN, current_line));
current_line++; current_line++;
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
case Char_Type::WHITESPACE: case Char_Type::WHITESPACE:
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
break; break;
} }
@ -234,8 +237,8 @@ namespace NVL {
if (determine_type_escaped(file[i]) == Char_Type::RETURN) if (determine_type_escaped(file[i]) == Char_Type::RETURN)
return { ErrCode::PARSE_ERROR, current_line }; return { ErrCode::PARSE_ERROR, current_line };
if (determine_type_escaped(file[i]) == Char_Type::QUOTE && current.back() != '\\') { if (determine_type_escaped(file[i]) == Char_Type::QUOTE && current.back() != '\\') {
tokens.push_back(Token(current, state)); tokens.push_back(Token(current, state, current_line));
tokens.push_back(Token('\"', Token_Type::QUOTE)); tokens.push_back(Token('\"', Token_Type::QUOTE, current_line));
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
} }
else { else {
@ -248,7 +251,7 @@ namespace NVL {
break; break;
case Token_Type::COMMENT: case Token_Type::COMMENT:
if (determine_type_escaped(file[i]) == Char_Type::RETURN) { if (determine_type_escaped(file[i]) == Char_Type::RETURN) {
tokens.push_back(Token('\n', Token_Type::RETURN)); tokens.push_back(Token('\n', Token_Type::RETURN, current_line));
current_line++; current_line++;
} }
state = Token_Type::STANDBY; state = Token_Type::STANDBY;
@ -260,59 +263,68 @@ namespace NVL {
return { ErrCode::SUCCESS, -1 }; return { ErrCode::SUCCESS, -1 };
} }
NVLError construct_tree(std::vector<Token> tokens, ASTNode& root) { namespace Parse {
Context root_context; NVLError Symbol(Token** token, Node& root) {
ASTNode* state = &root; NVLError status = { ErrCode::PARSE_ERROR, (*token)->line };
int current_line = 0;
for (int i = 0; i < tokens.size(); i++) { return status;
switch (tokens[i].type) {
case Token_Type::RETURN:
break;
case Token_Type::ID:
break;
case Token_Type::INT:
break;
case Token_Type::FLOAT:
break;
case Token_Type::BOOL:
break;
case Token_Type::SEQUENCE_BEG:
break;
case Token_Type::SEQUENCE_END:
break;
case Token_Type::DIALOGUE_BEG:
break;
case Token_Type::DIALOGUE_END:
break;
case Token_Type::LIST_BEG:
break;
case Token_Type::LIST_END:
break;
case Token_Type::QUOTE:
break;
case Token_Type::DIALOGUE:
break;
default:
return { ErrCode::SYNTAX_ERROR, current_line };
} }
NVLError Dialogue(Token** token, Node& root) {
NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line };
return status;
} }
return { ErrCode::SUCCESS, current_line }; NVLError Line(Token** token, Node& root) {
NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line };
while ((*token)->type != Token_Type::RETURN) {
if ((*token)->type == Token_Type::LIST_END) {
(*token)++;
break;
} }
if ((*token)->type == Token_Type::LIST_BEG) {
(*token)++;
status = Parse::Line(token, root);
}
else if ((*token)->type == Token_Type::DIALOGUE_BEG) {
(*token)++;
status = Parse::Dialogue(token, root);
}
else {
status = Parse::Symbol(token, root);
}
}
return status;
}
NVLError SequeneceName(Token** token, Node& root) {
NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line };
return status;
}
NVLError Sequenece(Token** token, Node& root) {
NVLError status = { ErrCode::SYNTAX_ERROR, (*token)->line };
SequeneceName(token, root);
while ((*token)->type != Token_Type::SEQUENCE_END)
status = Parse::Line(token, root);
(*token)++;
return status;
}
}
NVLError append_tree(std::vector<Token> tokens, Node& tree) {
NVLError status;
for (Token* i = &(tokens.front()); i < &(tokens.back()); i++)
status = Parse::Sequenece(&i, tree);
return status;
}
} }

View file

@ -24,7 +24,8 @@ namespace NVL {
enum struct ErrCode { enum struct ErrCode {
SUCCESS, SUCCESS,
PARSE_ERROR, PARSE_ERROR,
SYNTAX_ERROR SYNTAX_ERROR,
EVAL_ERROR
}; };
struct NVLError { struct NVLError {
@ -48,6 +49,8 @@ namespace NVL {
DIALOGUE_END, DIALOGUE_END,
LIST_BEG, LIST_BEG,
LIST_END, LIST_END,
BRACKET_BEG,
BRACKET_END,
QUOTE, QUOTE,
DIALOGUE, // Do not want to deal with characters in all languages separately, essentially just strings DIALOGUE, // Do not want to deal with characters in all languages separately, essentially just strings
COMMENT COMMENT
@ -56,10 +59,11 @@ namespace NVL {
struct Token { struct Token {
std::string value; std::string value;
Token_Type type; Token_Type type;
int line;
Token(std::string str, Token_Type type) : value(str), type(type) {}; Token(std::string str, Token_Type type, int line) : value(str), type(type), line(line) {};
Token(char c, Token_Type type) : value(std::string{ c }), type(type) {}; Token(char c, Token_Type type, int line) : value(std::string{ c }), type(type), line(line) {};
Token(char c, Char_Type type); Token(char c, Char_Type type, int line);
}; };
NVLError tokenize(std::string file, std::vector<Token>& tokens); NVLError tokenize(std::string file, std::vector<Token>& tokens);
@ -73,12 +77,8 @@ namespace NVL {
LITERAL LITERAL
}; };
struct Context { struct Node {
std::deque<Node_Type> stack; std::vector<Node> children;
};
struct ASTNode {
std::vector<ASTNode> children;
Node_Type type; Node_Type type;
std::variant< std::variant<
std::string, std::string,
@ -88,9 +88,12 @@ namespace NVL {
> Value; > Value;
}; };
NVLError construct_tree(std::vector<Token> tokens, ASTNode& root); NVLError append_tree(std::vector<Token> tokens, Node& tree);
struct Tree { namespace Parse {
ASTNode root; NVLError Sequenece(Token** token, Node& root);
}; NVLError Line(Token** token, Node& root);
NVLError Dialogue(Token** token, Node& root);
NVLError Symbol(Token** token, Node& root);
}
} }

View file

@ -5,7 +5,7 @@ int main()
{ {
const std::string PJ_DIR = "E:\\Archive\\Projects\\NouVeL\\"; const std::string PJ_DIR = "E:\\Archive\\Projects\\NouVeL\\";
std::ifstream fs(PJ_DIR + "test_j.nvl"); std::ifstream fs(PJ_DIR + "test_utf8.nvl");
std::string file((std::istreambuf_iterator<char>(fs)), std::istreambuf_iterator<char>()); std::string file((std::istreambuf_iterator<char>(fs)), std::istreambuf_iterator<char>());
std::vector<NVL::Token> tokens; std::vector<NVL::Token> tokens;

View file

@ -1,3 +1,3 @@
mmawesome { mmawesome {
sadasd sadasd ["く" "ぇrち" "ゅいお" ""] sadasd sadasd ["く\"" "ぇrち"[][[[[]]]][][][] "ゅいお" ""]
} }