This repository has been archived on 2021-09-20. You can view files and clone it, but cannot push or open issues or pull requests.
reemo-gasi/GASi/Lexer.cs
2021-09-20 03:05:36 +02:00

214 lines
9.7 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace GASi {
class Lexer {
private enum kTokenState {
UNSURE, ALPHA, NUMERIC, OPERATOR, PREPROCESSOR, RAW, STRING
}
private static string[] Scopes = { "global", "static", "local" };
private static string[] Types = { "string", "date", "bool", "float", "int", "void" };
private static string[] Controls = { "if", "else", "elseif",
"switch", "case", "default",
"for", "while" };
private static string[] Keywords = { "return", "ref", "break" };
private static string[] Operators = { "+", "-", "*", "/", "%",
"==", "!=", ">", ">=", "<", "<=", "&&", "||",
"&", "|", "^", "~",
"++", "--" };
private static string[] Assigners = { "=", "+=", "-=", "*=", "/=" };
private static string Significants = null;
private static char Peek(int column, string line) {
return (column + 1 >= line.Length) ? '\0' : line[column + 1];
}
private static char Rewind(int column, string line) {
return (column - 1 < 0) ? '\0' : line[column - 1];
}
public static IEnumerable<Token> Interpret(string[] rawProgramLines) {
List<Token> tokenList = new List<Token>();
Token current = new Token();
kTokenState state = kTokenState.UNSURE;
var rawStartLine = -0xFF;
for(var line = 0; line < rawProgramLines.Length; ++line) {
var rawProgramLine = rawProgramLines[line];
bool firstCharFound = false;
var rawStartCol = -0xFF;
for(var column = 0; column < rawProgramLine.Length; ++column) {
var glyph = rawProgramLine[column];
var curWord = current.Value;
var tmpWord = current.Value + glyph;
bool rerunGlyph = false;
if(state == kTokenState.UNSURE) {
// ignore tabs and spaces when nothing is being interpreted yet
if(glyph == 0x20 || glyph == 0x09)
continue;
if(glyph == '/' && Peek(column, rawProgramLine) == '/')
break;
else if(glyph == '#' && !firstCharFound)
state = kTokenState.PREPROCESSOR;
else if(glyph == ':' && !firstCharFound && Peek(column, rawProgramLine) == ':') {
state = kTokenState.RAW;
rawStartCol = column;
rawStartLine = line;
} else if(glyph == '"')
state = kTokenState.STRING;
else if(IsAlpha(glyph))
state = kTokenState.ALPHA;
else if(IsNumeric(glyph))
state = kTokenState.NUMERIC;
else if(IsSignificant(glyph))
state = kTokenState.OPERATOR;
else
throw Transpiler.Exception("Unexpected glyph " + glyph, line, column);
firstCharFound = true;
}
switch(state) {
case kTokenState.RAW:
if(glyph == ':' && Rewind(column, rawProgramLine) == ':' && rawStartCol != column - 1)
current.Type = Token.kType.RAW;
else if(column == 0 && line != rawStartLine)
current.Value += '\n';
break;
case kTokenState.ALPHA:
if(!IsAlpha(glyph) && !IsNumeric(glyph)) {
if(Scopes.Contains(curWord.Trim()))
current.Type = Token.kType.SCOPE;
else if(Types.Contains(curWord.Trim()))
current.Type = Token.kType.TYPE;
else if(Controls.Contains(curWord.Trim()))
current.Type = Token.kType.CONTROL;
else if(Keywords.Contains(curWord.Trim()))
current.Type = Token.kType.KEYWORD;
else
current.Type = Token.kType.IDENTIFIER;
rerunGlyph = true;
}
break;
case kTokenState.NUMERIC:
if(!IsNumeric(glyph) && !(glyph == '.' && !current.Value.Contains('.'))) {
current.Type = Token.kType.NUMBER;
rerunGlyph = true;
}
break;
case kTokenState.PREPROCESSOR:
if(!IsAlpha(glyph) && !(glyph == '#' && curWord == "")) {
current.Type = Token.kType.PREPROC;
rerunGlyph = true;
}
break;
case kTokenState.OPERATOR:
if("[]".Contains(glyph))
current.Type = glyph == '[' ? Token.kType.LBRACKET : Token.kType.RBRACKET;
else if("()".Contains(glyph))
current.Type = glyph == '(' ? Token.kType.LPAREN : Token.kType.RPAREN;
else if("{}".Contains(glyph))
current.Type = glyph == '{' ? Token.kType.LBRACE : Token.kType.RBRACE;
else {
switch(glyph) {
case '.':
current.Type = Token.kType.PERIOD;
break;
case ',':
current.Type = Token.kType.COMMA;
break;
case ';':
current.Type = Token.kType.SEMICOL;
break;
default:
if(Operators.Contains(curWord) && !Operators.Contains(tmpWord) && !Assigners.Contains(tmpWord)) {
current.Type = Token.kType.OPERATOR;
rerunGlyph = true;
} else if(Assigners.Contains(curWord) && !Assigners.Contains(tmpWord) && !Operators.Contains(tmpWord)) {
current.Type = Token.kType.ASSIGNER;
rerunGlyph = true;
}
break;
}
}
break;
case kTokenState.STRING:
// TODO determine if you can escape double quotes in gas using \"
if(curWord != "") {
if((glyph == '"' && !curWord.EndsWith("\\")) || column == rawProgramLine.Length - 1)
current.Type = Token.kType.STRING;
}
break;
}
if(!rerunGlyph)
current.Value += glyph;
else
--column;
if(current.Type != Token.kType.UNDECIDED) {
current.Value = current.Value.Trim();
tokenList.Add(current);
current = new Token();
state = kTokenState.UNSURE;
}
}
}
return tokenList;
}
private static bool IsAlpha(char glyph) {
return (glyph >= 0x41 && glyph <= 0x5A) || (glyph >= 0x61 && glyph <= 0x7A);
}
private static bool IsNumeric(char glyph) {
return glyph >= 0x30 && glyph <= 0x39;
}
private static bool IsSignificant(char glyph) {
AssembleSignificantCharactersTable();
return Significants.Contains(glyph);
}
private static void AssembleSignificantCharactersTable() {
if(Significants != null)
return;
Significants = "[]{}(),.;";
List<string> mergedList = Operators.ToList();
mergedList.AddRange(Assigners.ToList());
foreach(var op in mergedList) {
foreach(var glyph in op) {
if(!Significants.Contains(glyph))
Significants += glyph;
}
}
}
public class Token {
public kType Type { get; set; } = kType.UNDECIDED;
public string Value { get; set; } = "";
public enum kType {
UNDECIDED,
PREPROC, RAW,
SCOPE, TYPE, IDENTIFIER,
OPERATOR, ASSIGNER, CONTROL, KEYWORD,
LPAREN, RPAREN, LBRACKET, RBRACKET, LBRACE, RBRACE,
PERIOD, COMMA, SEMICOL,
NUMBER, STRING, BOOL
}
}
}
}